1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-NOHSA-SI %s 3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-HSA %s 4; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-NOHSA-VI %s 5; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck --check-prefix=EG %s 6 7define amdgpu_kernel void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(4)* %in) { 8; GCN-NOHSA-SI-LABEL: constant_load_i16: 9; GCN-NOHSA-SI: ; %bb.0: ; %entry 10; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 11; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 12; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 13; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 14; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 15; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 16; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 17; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 18; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 19; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 20; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 21; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 22; GCN-NOHSA-SI-NEXT: buffer_store_short v0, off, s[4:7], 0 23; GCN-NOHSA-SI-NEXT: s_endpgm 24; 25; GCN-HSA-LABEL: constant_load_i16: 26; GCN-HSA: ; %bb.0: ; %entry 27; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 28; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 29; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 30; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 31; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1] 32; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 33; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 34; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 35; GCN-HSA-NEXT: flat_store_short v[0:1], v2 36; GCN-HSA-NEXT: s_endpgm 37; 38; GCN-NOHSA-VI-LABEL: constant_load_i16: 39; GCN-NOHSA-VI: ; %bb.0: ; %entry 40; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 41; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 42; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 43; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 44; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 45; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 46; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 47; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 48; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 49; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 50; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 51; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 52; GCN-NOHSA-VI-NEXT: buffer_store_short v0, off, s[4:7], 0 53; GCN-NOHSA-VI-NEXT: s_endpgm 54; 55; EG-LABEL: constant_load_i16: 56; EG: ; %bb.0: ; %entry 57; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 58; EG-NEXT: TEX 0 @6 59; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 60; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 61; EG-NEXT: CF_END 62; EG-NEXT: PAD 63; EG-NEXT: Fetch clause starting at 6: 64; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 65; EG-NEXT: ALU clause starting at 8: 66; EG-NEXT: MOV * T0.X, KC0[2].Z, 67; EG-NEXT: ALU clause starting at 9: 68; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 69; EG-NEXT: AND_INT * T1.W, T0.X, literal.y, 70; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 71; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 72; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 73; EG-NEXT: LSHL T0.X, T1.W, PV.W, 74; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 75; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 76; EG-NEXT: MOV T0.Y, 0.0, 77; EG-NEXT: MOV * T0.Z, 0.0, 78; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 79; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 80entry: 81 %ld = load i16, i16 addrspace(4)* %in 82 store i16 %ld, i16 addrspace(1)* %out 83 ret void 84} 85 86define amdgpu_kernel void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) { 87; GCN-NOHSA-SI-LABEL: constant_load_v2i16: 88; GCN-NOHSA-SI: ; %bb.0: ; %entry 89; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 90; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 91; GCN-NOHSA-SI-NEXT: s_load_dword s4, s[2:3], 0x0 92; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 93; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 94; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 95; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 96; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 97; GCN-NOHSA-SI-NEXT: s_endpgm 98; 99; GCN-HSA-LABEL: constant_load_v2i16: 100; GCN-HSA: ; %bb.0: ; %entry 101; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 102; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 103; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 104; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 105; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 106; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 107; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 108; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 109; GCN-HSA-NEXT: s_endpgm 110; 111; GCN-NOHSA-VI-LABEL: constant_load_v2i16: 112; GCN-NOHSA-VI: ; %bb.0: ; %entry 113; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 114; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 115; GCN-NOHSA-VI-NEXT: s_load_dword s4, s[2:3], 0x0 116; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 117; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 118; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 119; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 120; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 121; GCN-NOHSA-VI-NEXT: s_endpgm 122; 123; EG-LABEL: constant_load_v2i16: 124; EG: ; %bb.0: ; %entry 125; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 126; EG-NEXT: TEX 0 @6 127; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 128; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 129; EG-NEXT: CF_END 130; EG-NEXT: PAD 131; EG-NEXT: Fetch clause starting at 6: 132; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 133; EG-NEXT: ALU clause starting at 8: 134; EG-NEXT: MOV * T0.X, KC0[2].Z, 135; EG-NEXT: ALU clause starting at 9: 136; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 137; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 138entry: 139 %ld = load <2 x i16>, <2 x i16> addrspace(4)* %in 140 store <2 x i16> %ld, <2 x i16> addrspace(1)* %out 141 ret void 142} 143 144define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) { 145; GCN-NOHSA-SI-LABEL: constant_load_v3i16: 146; GCN-NOHSA-SI: ; %bb.0: ; %entry 147; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 148; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 149; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 150; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 151; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 152; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 153; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 154; GCN-NOHSA-SI-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:4 155; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 156; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 157; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 158; GCN-NOHSA-SI-NEXT: s_endpgm 159; 160; GCN-HSA-LABEL: constant_load_v3i16: 161; GCN-HSA: ; %bb.0: ; %entry 162; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 163; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 164; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 165; GCN-HSA-NEXT: s_add_u32 s4, s0, 4 166; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 167; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 168; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 169; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5 170; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 171; GCN-HSA-NEXT: v_mov_b32_e32 v4, s3 172; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 173; GCN-HSA-NEXT: v_mov_b32_e32 v5, s2 174; GCN-HSA-NEXT: flat_store_short v[2:3], v4 175; GCN-HSA-NEXT: flat_store_dword v[0:1], v5 176; GCN-HSA-NEXT: s_endpgm 177; 178; GCN-NOHSA-VI-LABEL: constant_load_v3i16: 179; GCN-NOHSA-VI: ; %bb.0: ; %entry 180; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 181; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 182; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 183; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 184; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 185; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 186; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s5 187; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s4 188; GCN-NOHSA-VI-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:4 189; GCN-NOHSA-VI-NEXT: buffer_store_dword v1, off, s[0:3], 0 190; GCN-NOHSA-VI-NEXT: s_endpgm 191; 192; EG-LABEL: constant_load_v3i16: 193; EG: ; %bb.0: ; %entry 194; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 195; EG-NEXT: TEX 2 @6 196; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[] 197; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0 198; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X 199; EG-NEXT: CF_END 200; EG-NEXT: Fetch clause starting at 6: 201; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1 202; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1 203; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1 204; EG-NEXT: ALU clause starting at 12: 205; EG-NEXT: MOV * T5.X, KC0[2].Z, 206; EG-NEXT: ALU clause starting at 13: 207; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 208; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) 209; EG-NEXT: AND_INT T1.W, PV.W, literal.x, 210; EG-NEXT: AND_INT * T2.W, T5.X, literal.y, 211; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 212; EG-NEXT: LSHL * T1.W, PV.W, literal.x, 213; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 214; EG-NEXT: LSHL T5.X, T2.W, PV.W, 215; EG-NEXT: LSHL * T5.W, literal.x, PV.W, 216; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 217; EG-NEXT: MOV T5.Y, 0.0, 218; EG-NEXT: MOV * T5.Z, 0.0, 219; EG-NEXT: LSHR T8.X, T0.W, literal.x, 220; EG-NEXT: LSHL T0.W, T7.X, literal.y, 221; EG-NEXT: AND_INT * T1.W, T6.X, literal.z, 222; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 223; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 224; EG-NEXT: OR_INT T6.X, PV.W, PS, 225; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, 226; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 227entry: 228 %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in 229 store <3 x i16> %ld, <3 x i16> addrspace(1)* %out 230 ret void 231} 232 233define amdgpu_kernel void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) { 234; GCN-NOHSA-SI-LABEL: constant_load_v4i16: 235; GCN-NOHSA-SI: ; %bb.0: ; %entry 236; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 237; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 238; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 239; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 240; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 241; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 242; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 243; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5 244; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 245; GCN-NOHSA-SI-NEXT: s_endpgm 246; 247; GCN-HSA-LABEL: constant_load_v4i16: 248; GCN-HSA: ; %bb.0: ; %entry 249; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 250; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 251; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 252; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 253; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 254; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 255; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 256; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 257; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 258; GCN-HSA-NEXT: s_endpgm 259; 260; GCN-NOHSA-VI-LABEL: constant_load_v4i16: 261; GCN-NOHSA-VI: ; %bb.0: ; %entry 262; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 263; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 264; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 265; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 266; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 267; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 268; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 269; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5 270; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 271; GCN-NOHSA-VI-NEXT: s_endpgm 272; 273; EG-LABEL: constant_load_v4i16: 274; EG: ; %bb.0: ; %entry 275; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 276; EG-NEXT: TEX 0 @6 277; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 278; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 279; EG-NEXT: CF_END 280; EG-NEXT: PAD 281; EG-NEXT: Fetch clause starting at 6: 282; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 283; EG-NEXT: ALU clause starting at 8: 284; EG-NEXT: MOV * T0.X, KC0[2].Z, 285; EG-NEXT: ALU clause starting at 9: 286; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 287; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 288entry: 289 %ld = load <4 x i16>, <4 x i16> addrspace(4)* %in 290 store <4 x i16> %ld, <4 x i16> addrspace(1)* %out 291 ret void 292} 293 294define amdgpu_kernel void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) { 295; GCN-NOHSA-SI-LABEL: constant_load_v8i16: 296; GCN-NOHSA-SI: ; %bb.0: ; %entry 297; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 298; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 299; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 300; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 301; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 302; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 303; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 304; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5 305; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 306; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 307; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 308; GCN-NOHSA-SI-NEXT: s_endpgm 309; 310; GCN-HSA-LABEL: constant_load_v8i16: 311; GCN-HSA: ; %bb.0: ; %entry 312; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 313; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 314; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 315; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 316; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 317; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 318; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 319; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 320; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 321; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 322; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 323; GCN-HSA-NEXT: s_endpgm 324; 325; GCN-NOHSA-VI-LABEL: constant_load_v8i16: 326; GCN-NOHSA-VI: ; %bb.0: ; %entry 327; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 328; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 329; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 330; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 331; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 332; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 333; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 334; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5 335; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 336; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 337; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 338; GCN-NOHSA-VI-NEXT: s_endpgm 339; 340; EG-LABEL: constant_load_v8i16: 341; EG: ; %bb.0: ; %entry 342; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 343; EG-NEXT: TEX 0 @6 344; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 345; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 346; EG-NEXT: CF_END 347; EG-NEXT: PAD 348; EG-NEXT: Fetch clause starting at 6: 349; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 350; EG-NEXT: ALU clause starting at 8: 351; EG-NEXT: MOV * T0.X, KC0[2].Z, 352; EG-NEXT: ALU clause starting at 9: 353; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 354; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 355entry: 356 %ld = load <8 x i16>, <8 x i16> addrspace(4)* %in 357 store <8 x i16> %ld, <8 x i16> addrspace(1)* %out 358 ret void 359} 360 361define amdgpu_kernel void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) { 362; GCN-NOHSA-SI-LABEL: constant_load_v16i16: 363; GCN-NOHSA-SI: ; %bb.0: ; %entry 364; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 365; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 366; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 367; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, 0xf000 368; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, -1 369; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 370; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 371; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5 372; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 373; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 374; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 375; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 376; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s0 377; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s1 378; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s2 379; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s3 380; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 381; GCN-NOHSA-SI-NEXT: s_endpgm 382; 383; GCN-HSA-LABEL: constant_load_v16i16: 384; GCN-HSA: ; %bb.0: ; %entry 385; GCN-HSA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 386; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 387; GCN-HSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 388; GCN-HSA-NEXT: s_add_u32 s10, s8, 16 389; GCN-HSA-NEXT: s_addc_u32 s11, s9, 0 390; GCN-HSA-NEXT: v_mov_b32_e32 v6, s10 391; GCN-HSA-NEXT: v_mov_b32_e32 v7, s11 392; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 393; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 394; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 395; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 396; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 397; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 398; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 399; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 400; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 401; GCN-HSA-NEXT: v_mov_b32_e32 v6, s2 402; GCN-HSA-NEXT: v_mov_b32_e32 v7, s3 403; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 404; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 405; GCN-HSA-NEXT: s_endpgm 406; 407; GCN-NOHSA-VI-LABEL: constant_load_v16i16: 408; GCN-NOHSA-VI: ; %bb.0: ; %entry 409; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x24 410; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 411; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 412; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, 0xf000 413; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, -1 414; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 415; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 416; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5 417; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 418; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 419; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 420; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 421; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, s2 422; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, s3 423; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 424; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[8:11], 0 425; GCN-NOHSA-VI-NEXT: s_endpgm 426; 427; EG-LABEL: constant_load_v16i16: 428; EG: ; %bb.0: ; %entry 429; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 430; EG-NEXT: TEX 0 @8 431; EG-NEXT: ALU 3, @13, KC0[CB0:0-32], KC1[] 432; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 433; EG-NEXT: ALU 1, @17, KC0[CB0:0-32], KC1[] 434; EG-NEXT: TEX 0 @10 435; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 436; EG-NEXT: CF_END 437; EG-NEXT: Fetch clause starting at 8: 438; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 439; EG-NEXT: Fetch clause starting at 10: 440; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 441; EG-NEXT: ALU clause starting at 12: 442; EG-NEXT: MOV * T0.X, KC0[2].Z, 443; EG-NEXT: ALU clause starting at 13: 444; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 445; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 446; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 447; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 448; EG-NEXT: ALU clause starting at 17: 449; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 450; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 451entry: 452 %ld = load <16 x i16>, <16 x i16> addrspace(4)* %in 453 store <16 x i16> %ld, <16 x i16> addrspace(1)* %out 454 ret void 455} 456 457define amdgpu_kernel void @constant_load_v16i16_align2(<16 x i16> addrspace(4)* %ptr0) #0 { 458; GCN-NOHSA-SI-LABEL: constant_load_v16i16_align2: 459; GCN-NOHSA-SI: ; %bb.0: ; %entry 460; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 461; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 462; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 463; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 464; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 465; GCN-NOHSA-SI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:2 466; GCN-NOHSA-SI-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:4 467; GCN-NOHSA-SI-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:6 468; GCN-NOHSA-SI-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:8 469; GCN-NOHSA-SI-NEXT: buffer_load_ushort v3, off, s[0:3], 0 offset:10 470; GCN-NOHSA-SI-NEXT: buffer_load_ushort v6, off, s[0:3], 0 offset:12 471; GCN-NOHSA-SI-NEXT: buffer_load_ushort v7, off, s[0:3], 0 offset:14 472; GCN-NOHSA-SI-NEXT: buffer_load_ushort v8, off, s[0:3], 0 offset:16 473; GCN-NOHSA-SI-NEXT: buffer_load_ushort v9, off, s[0:3], 0 offset:18 474; GCN-NOHSA-SI-NEXT: buffer_load_ushort v10, off, s[0:3], 0 offset:20 475; GCN-NOHSA-SI-NEXT: buffer_load_ushort v11, off, s[0:3], 0 offset:22 476; GCN-NOHSA-SI-NEXT: buffer_load_ushort v12, off, s[0:3], 0 offset:24 477; GCN-NOHSA-SI-NEXT: buffer_load_ushort v13, off, s[0:3], 0 offset:26 478; GCN-NOHSA-SI-NEXT: buffer_load_ushort v14, off, s[0:3], 0 offset:28 479; GCN-NOHSA-SI-NEXT: buffer_load_ushort v15, off, s[0:3], 0 offset:30 480; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(8) 481; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v7, 16, v7 482; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v16, 16, v3 483; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v17, 16, v2 484; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v18, 16, v1 485; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 486; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v15, 16, v15 487; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v13, 16, v13 488; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v11, 16, v11 489; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v9, 16, v9 490; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v3, v7, v6 491; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v2, v16, v5 492; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v1, v17, v4 493; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v0, v18, v0 494; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v7, v15, v14 495; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v6, v13, v12 496; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v5, v11, v10 497; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v4, v9, v8 498; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 499; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 500; GCN-NOHSA-SI-NEXT: s_endpgm 501; 502; GCN-HSA-LABEL: constant_load_v16i16_align2: 503; GCN-HSA: ; %bb.0: ; %entry 504; GCN-HSA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 505; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 506; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 507; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 508; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 509; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 510; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 511; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 512; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 513; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 514; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 515; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[0:3] 516; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 517; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 518; GCN-HSA-NEXT: s_endpgm 519; 520; GCN-NOHSA-VI-LABEL: constant_load_v16i16_align2: 521; GCN-NOHSA-VI: ; %bb.0: ; %entry 522; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 523; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 524; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 525; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 526; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:14 527; GCN-NOHSA-VI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:10 528; GCN-NOHSA-VI-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:6 529; GCN-NOHSA-VI-NEXT: buffer_load_ushort v3, off, s[0:3], 0 offset:2 530; GCN-NOHSA-VI-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:30 531; GCN-NOHSA-VI-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:26 532; GCN-NOHSA-VI-NEXT: buffer_load_ushort v6, off, s[0:3], 0 offset:22 533; GCN-NOHSA-VI-NEXT: buffer_load_ushort v7, off, s[0:3], 0 offset:18 534; GCN-NOHSA-VI-NEXT: buffer_load_ushort v8, off, s[0:3], 0 offset:12 535; GCN-NOHSA-VI-NEXT: buffer_load_ushort v9, off, s[0:3], 0 offset:8 536; GCN-NOHSA-VI-NEXT: buffer_load_ushort v10, off, s[0:3], 0 offset:4 537; GCN-NOHSA-VI-NEXT: buffer_load_ushort v11, off, s[0:3], 0 538; GCN-NOHSA-VI-NEXT: buffer_load_ushort v12, off, s[0:3], 0 offset:28 539; GCN-NOHSA-VI-NEXT: buffer_load_ushort v13, off, s[0:3], 0 offset:24 540; GCN-NOHSA-VI-NEXT: buffer_load_ushort v14, off, s[0:3], 0 offset:20 541; GCN-NOHSA-VI-NEXT: buffer_load_ushort v15, off, s[0:3], 0 offset:16 542; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(14) 543; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 544; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 545; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(13) 546; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v16, 16, v2 547; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(12) 548; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v17, 16, v3 549; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(11) 550; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 551; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(10) 552; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 553; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(9) 554; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v18, 16, v6 555; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(8) 556; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v19, 16, v7 557; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(7) 558; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v3, v8, v0 559; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(6) 560; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v2, v9, v1 561; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(5) 562; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v1, v10, v16 563; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(4) 564; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v0, v11, v17 565; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 566; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v7, v12, v4 567; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(2) 568; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v6, v13, v5 569; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 570; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v5, v14, v18 571; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 572; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v4, v15, v19 573; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 574; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 575; GCN-NOHSA-VI-NEXT: s_endpgm 576; 577; EG-LABEL: constant_load_v16i16_align2: 578; EG: ; %bb.0: ; %entry 579; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 580; EG-NEXT: TEX 0 @8 581; EG-NEXT: ALU 1, @13, KC0[], KC1[] 582; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 583; EG-NEXT: TEX 0 @10 584; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1 585; EG-NEXT: CF_END 586; EG-NEXT: PAD 587; EG-NEXT: Fetch clause starting at 8: 588; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 589; EG-NEXT: Fetch clause starting at 10: 590; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 591; EG-NEXT: ALU clause starting at 12: 592; EG-NEXT: MOV * T0.X, KC0[2].Y, 593; EG-NEXT: ALU clause starting at 13: 594; EG-NEXT: MOV * T2.X, literal.x, 595; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00) 596entry: 597 %ld = load <16 x i16>, <16 x i16> addrspace(4)* %ptr0, align 2 598 store <16 x i16> %ld, <16 x i16> addrspace(1)* undef, align 32 599 ret void 600} 601 602define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 603; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i32: 604; GCN-NOHSA-SI: ; %bb.0: 605; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 606; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 607; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 608; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 609; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 610; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 611; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 612; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 613; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 614; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 615; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 616; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 617; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 618; GCN-NOHSA-SI-NEXT: s_endpgm 619; 620; GCN-HSA-LABEL: constant_zextload_i16_to_i32: 621; GCN-HSA: ; %bb.0: 622; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 623; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 624; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 625; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 626; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1] 627; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 628; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 629; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 630; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 631; GCN-HSA-NEXT: s_endpgm 632; 633; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i32: 634; GCN-NOHSA-VI: ; %bb.0: 635; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 636; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 637; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 638; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 639; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 640; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 641; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 642; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 643; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 644; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 645; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 646; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 647; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 648; GCN-NOHSA-VI-NEXT: s_endpgm 649; 650; EG-LABEL: constant_zextload_i16_to_i32: 651; EG: ; %bb.0: 652; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 653; EG-NEXT: TEX 0 @6 654; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 655; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 656; EG-NEXT: CF_END 657; EG-NEXT: PAD 658; EG-NEXT: Fetch clause starting at 6: 659; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 660; EG-NEXT: ALU clause starting at 8: 661; EG-NEXT: MOV * T0.X, KC0[2].Z, 662; EG-NEXT: ALU clause starting at 9: 663; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 664; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 665 %a = load i16, i16 addrspace(4)* %in 666 %ext = zext i16 %a to i32 667 store i32 %ext, i32 addrspace(1)* %out 668 ret void 669} 670 671define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 672; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i32: 673; GCN-NOHSA-SI: ; %bb.0: 674; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 675; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 676; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 677; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 678; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 679; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 680; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 681; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 682; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 683; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 684; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 685; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 686; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 687; GCN-NOHSA-SI-NEXT: s_endpgm 688; 689; GCN-HSA-LABEL: constant_sextload_i16_to_i32: 690; GCN-HSA: ; %bb.0: 691; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 692; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 693; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 694; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 695; GCN-HSA-NEXT: flat_load_sshort v2, v[0:1] 696; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 697; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 698; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 699; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 700; GCN-HSA-NEXT: s_endpgm 701; 702; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i32: 703; GCN-NOHSA-VI: ; %bb.0: 704; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 705; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 706; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 707; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 708; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 709; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 710; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 711; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 712; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 713; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 714; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 715; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 716; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 717; GCN-NOHSA-VI-NEXT: s_endpgm 718; 719; EG-LABEL: constant_sextload_i16_to_i32: 720; EG: ; %bb.0: 721; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 722; EG-NEXT: TEX 0 @6 723; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 724; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 725; EG-NEXT: CF_END 726; EG-NEXT: PAD 727; EG-NEXT: Fetch clause starting at 6: 728; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 729; EG-NEXT: ALU clause starting at 8: 730; EG-NEXT: MOV * T0.X, KC0[2].Z, 731; EG-NEXT: ALU clause starting at 9: 732; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 733; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 734; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 735 %a = load i16, i16 addrspace(4)* %in 736 %ext = sext i16 %a to i32 737 store i32 %ext, i32 addrspace(1)* %out 738 ret void 739} 740 741define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 { 742; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i32: 743; GCN-NOHSA-SI: ; %bb.0: 744; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 745; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 746; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 747; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 748; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 749; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 750; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 751; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 752; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 753; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 754; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 755; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 756; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 757; GCN-NOHSA-SI-NEXT: s_endpgm 758; 759; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i32: 760; GCN-HSA: ; %bb.0: 761; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 762; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 763; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 764; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 765; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1] 766; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 767; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 768; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 769; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 770; GCN-HSA-NEXT: s_endpgm 771; 772; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i32: 773; GCN-NOHSA-VI: ; %bb.0: 774; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 775; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 776; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 777; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 778; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 779; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 780; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 781; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 782; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 783; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 784; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 785; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 786; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 787; GCN-NOHSA-VI-NEXT: s_endpgm 788; 789; EG-LABEL: constant_zextload_v1i16_to_v1i32: 790; EG: ; %bb.0: 791; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 792; EG-NEXT: TEX 0 @6 793; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 794; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 795; EG-NEXT: CF_END 796; EG-NEXT: PAD 797; EG-NEXT: Fetch clause starting at 6: 798; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 799; EG-NEXT: ALU clause starting at 8: 800; EG-NEXT: MOV * T0.X, KC0[2].Z, 801; EG-NEXT: ALU clause starting at 9: 802; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 803; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 804 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in 805 %ext = zext <1 x i16> %load to <1 x i32> 806 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 807 ret void 808} 809 810define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 { 811; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i32: 812; GCN-NOHSA-SI: ; %bb.0: 813; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 814; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 815; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 816; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 817; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 818; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 819; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 820; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 821; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 822; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 823; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 824; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 825; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 826; GCN-NOHSA-SI-NEXT: s_endpgm 827; 828; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i32: 829; GCN-HSA: ; %bb.0: 830; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 831; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 832; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 833; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 834; GCN-HSA-NEXT: flat_load_sshort v2, v[0:1] 835; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 836; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 837; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 838; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 839; GCN-HSA-NEXT: s_endpgm 840; 841; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i32: 842; GCN-NOHSA-VI: ; %bb.0: 843; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 844; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 845; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 846; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 847; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 848; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 849; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 850; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 851; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 852; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 853; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 854; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 855; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 856; GCN-NOHSA-VI-NEXT: s_endpgm 857; 858; EG-LABEL: constant_sextload_v1i16_to_v1i32: 859; EG: ; %bb.0: 860; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 861; EG-NEXT: TEX 0 @6 862; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 863; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 864; EG-NEXT: CF_END 865; EG-NEXT: PAD 866; EG-NEXT: Fetch clause starting at 6: 867; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 868; EG-NEXT: ALU clause starting at 8: 869; EG-NEXT: MOV * T0.X, KC0[2].Z, 870; EG-NEXT: ALU clause starting at 9: 871; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 872; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 873; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 874 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in 875 %ext = sext <1 x i16> %load to <1 x i32> 876 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 877 ret void 878} 879 880define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 { 881; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i32: 882; GCN-NOHSA-SI: ; %bb.0: 883; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 884; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 885; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 886; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 887; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 888; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s2, 16 889; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s2, 0xffff 890; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 891; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 892; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s4 893; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 894; GCN-NOHSA-SI-NEXT: s_endpgm 895; 896; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i32: 897; GCN-HSA: ; %bb.0: 898; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 899; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 900; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 901; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 902; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 903; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 904; GCN-HSA-NEXT: s_lshr_b32 s0, s2, 16 905; GCN-HSA-NEXT: s_and_b32 s1, s2, 0xffff 906; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 907; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 908; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 909; GCN-HSA-NEXT: s_endpgm 910; 911; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i32: 912; GCN-NOHSA-VI: ; %bb.0: 913; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 914; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 915; GCN-NOHSA-VI-NEXT: s_load_dword s4, s[2:3], 0x0 916; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 917; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 918; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 919; GCN-NOHSA-VI-NEXT: s_lshr_b32 s5, s4, 16 920; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, 0xffff 921; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 922; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5 923; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 924; GCN-NOHSA-VI-NEXT: s_endpgm 925; 926; EG-LABEL: constant_zextload_v2i16_to_v2i32: 927; EG: ; %bb.0: 928; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 929; EG-NEXT: TEX 0 @6 930; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 931; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1 932; EG-NEXT: CF_END 933; EG-NEXT: PAD 934; EG-NEXT: Fetch clause starting at 6: 935; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 936; EG-NEXT: ALU clause starting at 8: 937; EG-NEXT: MOV * T4.X, KC0[2].Z, 938; EG-NEXT: ALU clause starting at 9: 939; EG-NEXT: LSHR * T4.Y, T4.X, literal.x, 940; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 941; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 942; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 943; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 944 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in 945 %ext = zext <2 x i16> %load to <2 x i32> 946 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 947 ret void 948} 949 950; TODO: We should use ASHR instead of LSHR + BFE 951define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 { 952; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i32: 953; GCN-NOHSA-SI: ; %bb.0: 954; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 955; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 956; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 957; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 958; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 959; GCN-NOHSA-SI-NEXT: s_ashr_i32 s4, s2, 16 960; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s2 961; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 962; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 963; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s4 964; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 965; GCN-NOHSA-SI-NEXT: s_endpgm 966; 967; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i32: 968; GCN-HSA: ; %bb.0: 969; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 970; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 971; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 972; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 973; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 974; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 975; GCN-HSA-NEXT: s_ashr_i32 s0, s2, 16 976; GCN-HSA-NEXT: s_sext_i32_i16 s1, s2 977; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 978; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 979; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 980; GCN-HSA-NEXT: s_endpgm 981; 982; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i32: 983; GCN-NOHSA-VI: ; %bb.0: 984; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 985; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 986; GCN-NOHSA-VI-NEXT: s_load_dword s4, s[2:3], 0x0 987; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 988; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 989; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 990; GCN-NOHSA-VI-NEXT: s_ashr_i32 s5, s4, 16 991; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 992; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 993; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5 994; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 995; GCN-NOHSA-VI-NEXT: s_endpgm 996; 997; EG-LABEL: constant_sextload_v2i16_to_v2i32: 998; EG: ; %bb.0: 999; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1000; EG-NEXT: TEX 0 @6 1001; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[] 1002; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1 1003; EG-NEXT: CF_END 1004; EG-NEXT: PAD 1005; EG-NEXT: Fetch clause starting at 6: 1006; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1007; EG-NEXT: ALU clause starting at 8: 1008; EG-NEXT: MOV * T4.X, KC0[2].Z, 1009; EG-NEXT: ALU clause starting at 9: 1010; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x, 1011; EG-NEXT: LSHR T0.W, T4.X, literal.x, 1012; EG-NEXT: LSHR * T4.X, KC0[2].Y, literal.y, 1013; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1014; EG-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.x, 1015; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1016 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in 1017 %ext = sext <2 x i16> %load to <2 x i32> 1018 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 1019 ret void 1020} 1021 1022define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) { 1023; GCN-NOHSA-SI-LABEL: constant_zextload_v3i16_to_v3i32: 1024; GCN-NOHSA-SI: ; %bb.0: ; %entry 1025; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1026; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1027; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1028; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1029; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1030; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1031; GCN-NOHSA-SI-NEXT: s_lshr_b32 s6, s4, 16 1032; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 1033; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 1034; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 1035; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 1036; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1037; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1038; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s6 1039; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1040; GCN-NOHSA-SI-NEXT: s_endpgm 1041; 1042; GCN-HSA-LABEL: constant_zextload_v3i16_to_v3i32: 1043; GCN-HSA: ; %bb.0: ; %entry 1044; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1045; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1046; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1047; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1048; GCN-HSA-NEXT: v_mov_b32_e32 v4, s1 1049; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1050; GCN-HSA-NEXT: s_lshr_b32 s0, s2, 16 1051; GCN-HSA-NEXT: s_and_b32 s1, s3, 0xffff 1052; GCN-HSA-NEXT: s_and_b32 s2, s2, 0xffff 1053; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1054; GCN-HSA-NEXT: v_mov_b32_e32 v1, s0 1055; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 1056; GCN-HSA-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 1057; GCN-HSA-NEXT: s_endpgm 1058; 1059; GCN-NOHSA-VI-LABEL: constant_zextload_v3i16_to_v3i32: 1060; GCN-NOHSA-VI: ; %bb.0: ; %entry 1061; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1062; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1063; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1064; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1065; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1066; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1067; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, 0xffff 1068; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s4, 16 1069; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, 0xffff 1070; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1071; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s6 1072; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1073; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 1074; GCN-NOHSA-VI-NEXT: s_endpgm 1075; 1076; EG-LABEL: constant_zextload_v3i16_to_v3i32: 1077; EG: ; %bb.0: ; %entry 1078; EG-NEXT: ALU 4, @12, KC0[CB0:0-32], KC1[] 1079; EG-NEXT: TEX 2 @6 1080; EG-NEXT: ALU 2, @17, KC0[], KC1[] 1081; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T4.X, 0 1082; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XY, T0.X, 1 1083; EG-NEXT: CF_END 1084; EG-NEXT: Fetch clause starting at 6: 1085; EG-NEXT: VTX_READ_16 T2.X, T1.X, 4, #1 1086; EG-NEXT: VTX_READ_16 T3.X, T1.X, 0, #1 1087; EG-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1 1088; EG-NEXT: ALU clause starting at 12: 1089; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 1090; EG-NEXT: MOV * T1.X, KC0[2].Z, 1091; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1092; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 1093; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1094; EG-NEXT: ALU clause starting at 17: 1095; EG-NEXT: LSHR T4.X, T0.W, literal.x, 1096; EG-NEXT: MOV * T3.Y, T1.X, 1097; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1098entry: 1099 %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in 1100 %ext = zext <3 x i16> %ld to <3 x i32> 1101 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 1102 ret void 1103} 1104 1105define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) { 1106; GCN-NOHSA-SI-LABEL: constant_sextload_v3i16_to_v3i32: 1107; GCN-NOHSA-SI: ; %bb.0: ; %entry 1108; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1109; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1110; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1111; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1112; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1113; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1114; GCN-NOHSA-SI-NEXT: s_ashr_i32 s6, s4, 16 1115; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 1116; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 1117; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 1118; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 1119; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1120; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1121; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s6 1122; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1123; GCN-NOHSA-SI-NEXT: s_endpgm 1124; 1125; GCN-HSA-LABEL: constant_sextload_v3i16_to_v3i32: 1126; GCN-HSA: ; %bb.0: ; %entry 1127; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1128; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1129; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1130; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1131; GCN-HSA-NEXT: v_mov_b32_e32 v4, s1 1132; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1133; GCN-HSA-NEXT: s_ashr_i32 s0, s2, 16 1134; GCN-HSA-NEXT: s_sext_i32_i16 s1, s3 1135; GCN-HSA-NEXT: s_sext_i32_i16 s2, s2 1136; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1137; GCN-HSA-NEXT: v_mov_b32_e32 v1, s0 1138; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 1139; GCN-HSA-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 1140; GCN-HSA-NEXT: s_endpgm 1141; 1142; GCN-NOHSA-VI-LABEL: constant_sextload_v3i16_to_v3i32: 1143; GCN-NOHSA-VI: ; %bb.0: ; %entry 1144; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1145; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1146; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1147; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1148; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1149; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1150; GCN-NOHSA-VI-NEXT: s_ashr_i32 s6, s4, 16 1151; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 1152; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 1153; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1154; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s6 1155; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1156; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 1157; GCN-NOHSA-VI-NEXT: s_endpgm 1158; 1159; EG-LABEL: constant_sextload_v3i16_to_v3i32: 1160; EG: ; %bb.0: ; %entry 1161; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 1162; EG-NEXT: TEX 2 @6 1163; EG-NEXT: ALU 9, @13, KC0[CB0:0-32], KC1[] 1164; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 1165; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1166; EG-NEXT: CF_END 1167; EG-NEXT: Fetch clause starting at 6: 1168; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1 1169; EG-NEXT: VTX_READ_16 T2.X, T0.X, 4, #1 1170; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1171; EG-NEXT: ALU clause starting at 12: 1172; EG-NEXT: MOV * T0.X, KC0[2].Z, 1173; EG-NEXT: ALU clause starting at 13: 1174; EG-NEXT: BFE_INT * T0.Y, T1.X, 0.0, literal.x, 1175; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1176; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 1177; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 1178; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1179; EG-NEXT: BFE_INT T2.X, T2.X, 0.0, literal.x, 1180; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1181; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) 1182; EG-NEXT: LSHR * T3.X, PV.W, literal.x, 1183; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1184entry: 1185 %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in 1186 %ext = sext <3 x i16> %ld to <3 x i32> 1187 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 1188 ret void 1189} 1190 1191; v4i16 is naturally 8 byte aligned 1192; TODO: This should use LD, but for some there are redundant MOVs 1193define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 { 1194; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i32: 1195; GCN-NOHSA-SI: ; %bb.0: 1196; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1197; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1198; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1199; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1200; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1201; GCN-NOHSA-SI-NEXT: s_lshr_b32 s6, s5, 16 1202; GCN-NOHSA-SI-NEXT: s_lshr_b32 s7, s4, 16 1203; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 1204; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 1205; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1206; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1207; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7 1208; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1209; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s6 1210; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1211; GCN-NOHSA-SI-NEXT: s_endpgm 1212; 1213; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i32: 1214; GCN-HSA: ; %bb.0: 1215; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1216; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1217; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1218; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1219; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1220; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1221; GCN-HSA-NEXT: s_lshr_b32 s0, s3, 16 1222; GCN-HSA-NEXT: s_lshr_b32 s1, s2, 16 1223; GCN-HSA-NEXT: s_and_b32 s3, s3, 0xffff 1224; GCN-HSA-NEXT: s_and_b32 s2, s2, 0xffff 1225; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1226; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1227; GCN-HSA-NEXT: v_mov_b32_e32 v2, s3 1228; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1229; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1230; GCN-HSA-NEXT: s_endpgm 1231; 1232; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i32: 1233; GCN-NOHSA-VI: ; %bb.0: 1234; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1235; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1236; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1237; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1238; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1239; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1240; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s5, 16 1241; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, 0xffff 1242; GCN-NOHSA-VI-NEXT: s_lshr_b32 s7, s4, 16 1243; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, 0xffff 1244; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1245; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7 1246; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1247; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s6 1248; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1249; GCN-NOHSA-VI-NEXT: s_endpgm 1250; 1251; EG-LABEL: constant_zextload_v4i16_to_v4i32: 1252; EG: ; %bb.0: 1253; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1254; EG-NEXT: TEX 0 @6 1255; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] 1256; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 1257; EG-NEXT: CF_END 1258; EG-NEXT: PAD 1259; EG-NEXT: Fetch clause starting at 6: 1260; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1261; EG-NEXT: ALU clause starting at 8: 1262; EG-NEXT: MOV * T5.X, KC0[2].Z, 1263; EG-NEXT: ALU clause starting at 9: 1264; EG-NEXT: MOV T2.X, T5.X, 1265; EG-NEXT: MOV * T3.X, T5.Y, 1266; EG-NEXT: MOV T0.Y, PV.X, 1267; EG-NEXT: MOV * T0.Z, PS, 1268; EG-NEXT: LSHR * T5.W, PV.Z, literal.x, 1269; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1270; EG-NEXT: AND_INT * T5.Z, T0.Z, literal.x, 1271; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1272; EG-NEXT: LSHR * T5.Y, T0.Y, literal.x, 1273; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1274; EG-NEXT: AND_INT T5.X, T0.Y, literal.x, 1275; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.y, 1276; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1277 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in 1278 %ext = zext <4 x i16> %load to <4 x i32> 1279 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 1280 ret void 1281} 1282 1283; v4i16 is naturally 8 byte aligned 1284; TODO: This should use LD, but for some there are redundant MOVs 1285; TODO: We should use ASHR instead of LSHR + BFE 1286define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 { 1287; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i32: 1288; GCN-NOHSA-SI: ; %bb.0: 1289; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1290; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1291; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1292; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1293; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1294; GCN-NOHSA-SI-NEXT: s_ashr_i32 s8, s4, 16 1295; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[4:5], 48 1296; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 1297; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 1298; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1299; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1300; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s8 1301; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1302; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s6 1303; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1304; GCN-NOHSA-SI-NEXT: s_endpgm 1305; 1306; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i32: 1307; GCN-HSA: ; %bb.0: 1308; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1309; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1310; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1311; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1312; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1313; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1314; GCN-HSA-NEXT: s_ashr_i64 s[0:1], s[2:3], 48 1315; GCN-HSA-NEXT: s_ashr_i32 s4, s2, 16 1316; GCN-HSA-NEXT: s_sext_i32_i16 s1, s3 1317; GCN-HSA-NEXT: s_sext_i32_i16 s2, s2 1318; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1319; GCN-HSA-NEXT: v_mov_b32_e32 v1, s4 1320; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 1321; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1322; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1323; GCN-HSA-NEXT: s_endpgm 1324; 1325; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i32: 1326; GCN-NOHSA-VI: ; %bb.0: 1327; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1328; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1329; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1330; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1331; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1332; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1333; GCN-NOHSA-VI-NEXT: s_ashr_i32 s6, s5, 16 1334; GCN-NOHSA-VI-NEXT: s_ashr_i32 s7, s4, 16 1335; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 1336; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 1337; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1338; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7 1339; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1340; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s6 1341; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1342; GCN-NOHSA-VI-NEXT: s_endpgm 1343; 1344; EG-LABEL: constant_sextload_v4i16_to_v4i32: 1345; EG: ; %bb.0: 1346; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1347; EG-NEXT: TEX 0 @6 1348; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] 1349; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 1350; EG-NEXT: CF_END 1351; EG-NEXT: PAD 1352; EG-NEXT: Fetch clause starting at 6: 1353; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1354; EG-NEXT: ALU clause starting at 8: 1355; EG-NEXT: MOV * T5.X, KC0[2].Z, 1356; EG-NEXT: ALU clause starting at 9: 1357; EG-NEXT: MOV T2.X, T5.X, 1358; EG-NEXT: MOV * T3.X, T5.Y, 1359; EG-NEXT: MOV T0.Y, PV.X, 1360; EG-NEXT: MOV * T0.Z, PS, 1361; EG-NEXT: BFE_INT * T5.Z, PV.Z, 0.0, literal.x, 1362; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1363; EG-NEXT: BFE_INT T5.X, T0.Y, 0.0, literal.x, 1364; EG-NEXT: LSHR * T0.W, T0.Z, literal.x, 1365; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1366; EG-NEXT: BFE_INT T5.W, PV.W, 0.0, literal.x, 1367; EG-NEXT: LSHR * T0.W, T0.Y, literal.x, 1368; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1369; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 1370; EG-NEXT: BFE_INT * T5.Y, PS, 0.0, literal.y, 1371; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1372 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in 1373 %ext = sext <4 x i16> %load to <4 x i32> 1374 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 1375 ret void 1376} 1377 1378; v8i16 is naturally 16 byte aligned 1379; TODO: These should use LSHR instead of BFE_UINT 1380; TODO: This should use DST, but for some there are redundant MOVs 1381define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 { 1382; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i32: 1383; GCN-NOHSA-SI: ; %bb.0: 1384; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1385; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1386; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1387; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1388; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1389; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1390; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s5, 16 1391; GCN-NOHSA-SI-NEXT: s_lshr_b32 s9, s4, 16 1392; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s7, 16 1393; GCN-NOHSA-SI-NEXT: s_lshr_b32 s11, s6, 16 1394; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 1395; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, 0xffff 1396; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, 0xffff 1397; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 1398; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 1399; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11 1400; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 1401; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s10 1402; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1403; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1404; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1405; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s9 1406; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1407; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s8 1408; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1409; GCN-NOHSA-SI-NEXT: s_endpgm 1410; 1411; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i32: 1412; GCN-HSA: ; %bb.0: 1413; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1414; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1415; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1416; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1417; GCN-HSA-NEXT: s_lshr_b32 s8, s5, 16 1418; GCN-HSA-NEXT: s_lshr_b32 s9, s4, 16 1419; GCN-HSA-NEXT: s_lshr_b32 s2, s7, 16 1420; GCN-HSA-NEXT: s_lshr_b32 s3, s6, 16 1421; GCN-HSA-NEXT: s_and_b32 s5, s5, 0xffff 1422; GCN-HSA-NEXT: s_and_b32 s4, s4, 0xffff 1423; GCN-HSA-NEXT: s_and_b32 s7, s7, 0xffff 1424; GCN-HSA-NEXT: s_and_b32 s6, s6, 0xffff 1425; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 1426; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1427; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1428; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1429; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1430; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 1431; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 1432; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1433; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1434; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1435; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 1436; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 1437; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 1438; GCN-HSA-NEXT: v_mov_b32_e32 v3, s8 1439; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1440; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1441; GCN-HSA-NEXT: s_endpgm 1442; 1443; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i32: 1444; GCN-NOHSA-VI: ; %bb.0: 1445; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1446; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1447; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1448; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1449; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1450; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1451; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s7, 16 1452; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, 0xffff 1453; GCN-NOHSA-VI-NEXT: s_lshr_b32 s11, s6, 16 1454; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, 0xffff 1455; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s5, 16 1456; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, 0xffff 1457; GCN-NOHSA-VI-NEXT: s_lshr_b32 s9, s4, 16 1458; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, 0xffff 1459; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 1460; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s11 1461; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 1462; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s10 1463; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1464; GCN-NOHSA-VI-NEXT: s_nop 0 1465; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1466; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 1467; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1468; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s8 1469; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1470; GCN-NOHSA-VI-NEXT: s_endpgm 1471; 1472; EG-LABEL: constant_zextload_v8i16_to_v8i32: 1473; EG: ; %bb.0: 1474; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1475; EG-NEXT: TEX 0 @6 1476; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[] 1477; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0 1478; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1 1479; EG-NEXT: CF_END 1480; EG-NEXT: Fetch clause starting at 6: 1481; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 1482; EG-NEXT: ALU clause starting at 8: 1483; EG-NEXT: MOV * T7.X, KC0[2].Z, 1484; EG-NEXT: ALU clause starting at 9: 1485; EG-NEXT: LSHR * T8.W, T7.Y, literal.x, 1486; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1487; EG-NEXT: AND_INT * T8.Z, T7.Y, literal.x, 1488; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1489; EG-NEXT: LSHR T8.Y, T7.X, literal.x, 1490; EG-NEXT: LSHR * T9.W, T7.W, literal.x, 1491; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1492; EG-NEXT: AND_INT T8.X, T7.X, literal.x, 1493; EG-NEXT: AND_INT T9.Z, T7.W, literal.x, 1494; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.y, 1495; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1496; EG-NEXT: LSHR * T9.Y, T7.Z, literal.x, 1497; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1498; EG-NEXT: AND_INT T9.X, T7.Z, literal.x, 1499; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1500; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 1501; EG-NEXT: LSHR * T10.X, PV.W, literal.x, 1502; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1503 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in 1504 %ext = zext <8 x i16> %load to <8 x i32> 1505 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 1506 ret void 1507} 1508 1509; v8i16 is naturally 16 byte aligned 1510; TODO: 4 of these should use ASHR instead of LSHR + BFE_INT 1511; TODO: This should use DST, but for some there are redundant MOVs 1512define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 { 1513; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i32: 1514; GCN-NOHSA-SI: ; %bb.0: 1515; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1516; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1517; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1518; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1519; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1520; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1521; GCN-NOHSA-SI-NEXT: s_ashr_i32 s8, s5, 16 1522; GCN-NOHSA-SI-NEXT: s_ashr_i32 s9, s4, 16 1523; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 1524; GCN-NOHSA-SI-NEXT: s_ashr_i32 s10, s7, 16 1525; GCN-NOHSA-SI-NEXT: s_ashr_i32 s11, s6, 16 1526; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7 1527; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 1528; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 1529; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 1530; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11 1531; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 1532; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s10 1533; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1534; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1535; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1536; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s9 1537; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1538; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s8 1539; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1540; GCN-NOHSA-SI-NEXT: s_endpgm 1541; 1542; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i32: 1543; GCN-HSA: ; %bb.0: 1544; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1545; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1546; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1547; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1548; GCN-HSA-NEXT: s_ashr_i32 s8, s5, 16 1549; GCN-HSA-NEXT: s_ashr_i32 s9, s4, 16 1550; GCN-HSA-NEXT: s_ashr_i32 s2, s7, 16 1551; GCN-HSA-NEXT: s_ashr_i32 s3, s6, 16 1552; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 1553; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1554; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1555; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1556; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7 1557; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6 1558; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1559; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 1560; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 1561; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1562; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5 1563; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4 1564; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1565; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1566; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 1567; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 1568; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 1569; GCN-HSA-NEXT: v_mov_b32_e32 v3, s8 1570; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1571; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1572; GCN-HSA-NEXT: s_endpgm 1573; 1574; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i32: 1575; GCN-NOHSA-VI: ; %bb.0: 1576; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1577; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1578; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1579; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1580; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1581; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1582; GCN-NOHSA-VI-NEXT: s_ashr_i32 s10, s7, 16 1583; GCN-NOHSA-VI-NEXT: s_ashr_i32 s11, s6, 16 1584; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 1585; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 1586; GCN-NOHSA-VI-NEXT: s_ashr_i32 s8, s5, 16 1587; GCN-NOHSA-VI-NEXT: s_ashr_i32 s9, s4, 16 1588; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 1589; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 1590; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 1591; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s11 1592; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 1593; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s10 1594; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1595; GCN-NOHSA-VI-NEXT: s_nop 0 1596; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1597; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 1598; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1599; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s8 1600; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1601; GCN-NOHSA-VI-NEXT: s_endpgm 1602; 1603; EG-LABEL: constant_sextload_v8i16_to_v8i32: 1604; EG: ; %bb.0: 1605; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1606; EG-NEXT: TEX 0 @6 1607; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[] 1608; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0 1609; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1 1610; EG-NEXT: CF_END 1611; EG-NEXT: Fetch clause starting at 6: 1612; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 1613; EG-NEXT: ALU clause starting at 8: 1614; EG-NEXT: MOV * T7.X, KC0[2].Z, 1615; EG-NEXT: ALU clause starting at 9: 1616; EG-NEXT: BFE_INT * T8.Z, T7.Y, 0.0, literal.x, 1617; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1618; EG-NEXT: BFE_INT T8.X, T7.X, 0.0, literal.x, 1619; EG-NEXT: BFE_INT T9.Z, T7.W, 0.0, literal.x, 1620; EG-NEXT: LSHR * T0.W, T7.Y, literal.x, 1621; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1622; EG-NEXT: BFE_INT T9.X, T7.Z, 0.0, literal.x, 1623; EG-NEXT: LSHR T0.Z, T7.W, literal.x, 1624; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, literal.x, 1625; EG-NEXT: LSHR * T0.W, T7.X, literal.x, 1626; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1627; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 1628; EG-NEXT: BFE_INT T8.Y, PS, 0.0, literal.y, 1629; EG-NEXT: LSHR T1.Z, T7.Z, literal.y, 1630; EG-NEXT: BFE_INT T9.W, PV.Z, 0.0, literal.y, 1631; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1632; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1633; EG-NEXT: LSHR T10.X, PS, literal.x, 1634; EG-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y, 1635; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1636 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in 1637 %ext = sext <8 x i16> %load to <8 x i32> 1638 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 1639 ret void 1640} 1641 1642define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 { 1643; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i32: 1644; GCN-NOHSA-SI: ; %bb.0: 1645; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1646; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1647; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 1648; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1649; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1650; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1651; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s5, 16 1652; GCN-NOHSA-SI-NEXT: s_lshr_b32 s13, s4, 16 1653; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s7, 16 1654; GCN-NOHSA-SI-NEXT: s_lshr_b32 s15, s6, 16 1655; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s9, 16 1656; GCN-NOHSA-SI-NEXT: s_lshr_b32 s17, s8, 16 1657; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s11, 16 1658; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s10, 16 1659; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 1660; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 1661; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, 0xffff 1662; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, 0xffff 1663; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, 0xffff 1664; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, 0xffff 1665; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, 0xffff 1666; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, 0xffff 1667; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 1668; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 1669; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 1670; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18 1671; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 1672; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1673; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 1674; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s17 1675; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 1676; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s16 1677; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 1678; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1679; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 1680; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s15 1681; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 1682; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s14 1683; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1684; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1685; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1686; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s13 1687; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1688; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s12 1689; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1690; GCN-NOHSA-SI-NEXT: s_endpgm 1691; 1692; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i32: 1693; GCN-HSA: ; %bb.0: 1694; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1695; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1696; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 1697; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1698; GCN-HSA-NEXT: s_lshr_b32 s12, s5, 16 1699; GCN-HSA-NEXT: s_lshr_b32 s13, s4, 16 1700; GCN-HSA-NEXT: s_lshr_b32 s14, s7, 16 1701; GCN-HSA-NEXT: s_lshr_b32 s15, s6, 16 1702; GCN-HSA-NEXT: s_lshr_b32 s16, s9, 16 1703; GCN-HSA-NEXT: s_lshr_b32 s17, s8, 16 1704; GCN-HSA-NEXT: s_lshr_b32 s2, s11, 16 1705; GCN-HSA-NEXT: s_lshr_b32 s3, s10, 16 1706; GCN-HSA-NEXT: s_and_b32 s5, s5, 0xffff 1707; GCN-HSA-NEXT: s_and_b32 s4, s4, 0xffff 1708; GCN-HSA-NEXT: s_and_b32 s7, s7, 0xffff 1709; GCN-HSA-NEXT: s_and_b32 s6, s6, 0xffff 1710; GCN-HSA-NEXT: s_and_b32 s9, s9, 0xffff 1711; GCN-HSA-NEXT: s_and_b32 s8, s8, 0xffff 1712; GCN-HSA-NEXT: s_and_b32 s11, s11, 0xffff 1713; GCN-HSA-NEXT: s_and_b32 s10, s10, 0xffff 1714; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 1715; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 1716; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1717; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1718; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1719; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1720; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 1721; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 1722; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 1723; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1724; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1725; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1726; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1727; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1728; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 1729; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17 1730; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 1731; GCN-HSA-NEXT: v_mov_b32_e32 v3, s16 1732; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1733; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1734; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1735; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 1736; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15 1737; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 1738; GCN-HSA-NEXT: v_mov_b32_e32 v3, s14 1739; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1740; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1741; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1742; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 1743; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13 1744; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 1745; GCN-HSA-NEXT: v_mov_b32_e32 v3, s12 1746; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1747; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1748; GCN-HSA-NEXT: s_endpgm 1749; 1750; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i32: 1751; GCN-NOHSA-VI: ; %bb.0: 1752; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1753; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1754; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 1755; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1756; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1757; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1758; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s11, 16 1759; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, 0xffff 1760; GCN-NOHSA-VI-NEXT: s_lshr_b32 s19, s10, 16 1761; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, 0xffff 1762; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s9, 16 1763; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, 0xffff 1764; GCN-NOHSA-VI-NEXT: s_lshr_b32 s17, s8, 16 1765; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, 0xffff 1766; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 1767; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19 1768; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 1769; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s18 1770; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s7, 16 1771; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, 0xffff 1772; GCN-NOHSA-VI-NEXT: s_lshr_b32 s15, s6, 16 1773; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, 0xffff 1774; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 1775; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s5, 16 1776; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 1777; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s17 1778; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 1779; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s16 1780; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, 0xffff 1781; GCN-NOHSA-VI-NEXT: s_lshr_b32 s13, s4, 16 1782; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, 0xffff 1783; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 1784; GCN-NOHSA-VI-NEXT: s_nop 0 1785; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 1786; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15 1787; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 1788; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s14 1789; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1790; GCN-NOHSA-VI-NEXT: s_nop 0 1791; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1792; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13 1793; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1794; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s12 1795; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1796; GCN-NOHSA-VI-NEXT: s_endpgm 1797; 1798; EG-LABEL: constant_zextload_v16i16_to_v16i32: 1799; EG: ; %bb.0: 1800; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 1801; EG-NEXT: TEX 1 @8 1802; EG-NEXT: ALU 35, @13, KC0[CB0:0-32], KC1[] 1803; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0 1804; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0 1805; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0 1806; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1 1807; EG-NEXT: CF_END 1808; EG-NEXT: Fetch clause starting at 8: 1809; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1 1810; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1 1811; EG-NEXT: ALU clause starting at 12: 1812; EG-NEXT: MOV * T11.X, KC0[2].Z, 1813; EG-NEXT: ALU clause starting at 13: 1814; EG-NEXT: LSHR * T13.W, T12.Y, literal.x, 1815; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1816; EG-NEXT: AND_INT * T13.Z, T12.Y, literal.x, 1817; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1818; EG-NEXT: LSHR T13.Y, T12.X, literal.x, 1819; EG-NEXT: LSHR * T14.W, T12.W, literal.x, 1820; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1821; EG-NEXT: AND_INT T13.X, T12.X, literal.x, 1822; EG-NEXT: AND_INT T14.Z, T12.W, literal.x, 1823; EG-NEXT: LSHR * T12.X, KC0[2].Y, literal.y, 1824; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1825; EG-NEXT: LSHR T14.Y, T12.Z, literal.x, 1826; EG-NEXT: LSHR * T15.W, T11.Y, literal.x, 1827; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1828; EG-NEXT: AND_INT T14.X, T12.Z, literal.x, 1829; EG-NEXT: AND_INT T15.Z, T11.Y, literal.x, 1830; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1831; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 1832; EG-NEXT: LSHR T16.X, PV.W, literal.x, 1833; EG-NEXT: LSHR T15.Y, T11.X, literal.y, 1834; EG-NEXT: LSHR T17.W, T11.W, literal.y, 1835; EG-NEXT: AND_INT * T15.X, T11.X, literal.z, 1836; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1837; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1838; EG-NEXT: AND_INT T17.Z, T11.W, literal.x, 1839; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1840; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 1841; EG-NEXT: LSHR T11.X, PV.W, literal.x, 1842; EG-NEXT: LSHR T17.Y, T11.Z, literal.y, 1843; EG-NEXT: AND_INT * T17.X, T11.Z, literal.z, 1844; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1845; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1846; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 1847; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 1848; EG-NEXT: LSHR * T18.X, PV.W, literal.x, 1849; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1850 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in 1851 %ext = zext <16 x i16> %load to <16 x i32> 1852 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 1853 ret void 1854} 1855 1856define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 { 1857; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i32: 1858; GCN-NOHSA-SI: ; %bb.0: 1859; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1860; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1861; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 1862; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1863; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1864; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1865; GCN-NOHSA-SI-NEXT: s_ashr_i32 s12, s5, 16 1866; GCN-NOHSA-SI-NEXT: s_ashr_i32 s13, s4, 16 1867; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 1868; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 1869; GCN-NOHSA-SI-NEXT: s_ashr_i32 s14, s7, 16 1870; GCN-NOHSA-SI-NEXT: s_ashr_i32 s15, s6, 16 1871; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7 1872; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 1873; GCN-NOHSA-SI-NEXT: s_ashr_i32 s16, s9, 16 1874; GCN-NOHSA-SI-NEXT: s_ashr_i32 s17, s8, 16 1875; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s9, s9 1876; GCN-NOHSA-SI-NEXT: s_ashr_i32 s18, s11, 16 1877; GCN-NOHSA-SI-NEXT: s_ashr_i32 s19, s10, 16 1878; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s11, s11 1879; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s10, s10 1880; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s8, s8 1881; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 1882; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 1883; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 1884; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18 1885; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 1886; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1887; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 1888; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s17 1889; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 1890; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s16 1891; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 1892; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1893; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 1894; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s15 1895; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 1896; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s14 1897; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1898; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1899; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1900; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s13 1901; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1902; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s12 1903; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1904; GCN-NOHSA-SI-NEXT: s_endpgm 1905; 1906; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i32: 1907; GCN-HSA: ; %bb.0: 1908; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1909; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1910; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 1911; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1912; GCN-HSA-NEXT: s_ashr_i32 s12, s5, 16 1913; GCN-HSA-NEXT: s_ashr_i32 s13, s4, 16 1914; GCN-HSA-NEXT: s_ashr_i32 s14, s7, 16 1915; GCN-HSA-NEXT: s_ashr_i32 s15, s6, 16 1916; GCN-HSA-NEXT: s_ashr_i32 s16, s9, 16 1917; GCN-HSA-NEXT: s_ashr_i32 s17, s8, 16 1918; GCN-HSA-NEXT: s_ashr_i32 s2, s11, 16 1919; GCN-HSA-NEXT: s_ashr_i32 s3, s10, 16 1920; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 1921; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 1922; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1923; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1924; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1925; GCN-HSA-NEXT: s_sext_i32_i16 s11, s11 1926; GCN-HSA-NEXT: s_sext_i32_i16 s10, s10 1927; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1928; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 1929; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 1930; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 1931; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1932; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1933; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1934; GCN-HSA-NEXT: s_sext_i32_i16 s9, s9 1935; GCN-HSA-NEXT: s_sext_i32_i16 s8, s8 1936; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1937; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1938; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 1939; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17 1940; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 1941; GCN-HSA-NEXT: v_mov_b32_e32 v3, s16 1942; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1943; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7 1944; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6 1945; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1946; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1947; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 1948; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15 1949; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 1950; GCN-HSA-NEXT: v_mov_b32_e32 v3, s14 1951; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1952; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5 1953; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4 1954; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1955; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1956; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 1957; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13 1958; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 1959; GCN-HSA-NEXT: v_mov_b32_e32 v3, s12 1960; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1961; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1962; GCN-HSA-NEXT: s_endpgm 1963; 1964; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i32: 1965; GCN-NOHSA-VI: ; %bb.0: 1966; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1967; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1968; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 1969; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1970; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1971; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1972; GCN-NOHSA-VI-NEXT: s_ashr_i32 s18, s11, 16 1973; GCN-NOHSA-VI-NEXT: s_ashr_i32 s19, s10, 16 1974; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s11, s11 1975; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s10, s10 1976; GCN-NOHSA-VI-NEXT: s_ashr_i32 s16, s9, 16 1977; GCN-NOHSA-VI-NEXT: s_ashr_i32 s17, s8, 16 1978; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s9, s9 1979; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s8, s8 1980; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 1981; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19 1982; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 1983; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s18 1984; GCN-NOHSA-VI-NEXT: s_ashr_i32 s14, s7, 16 1985; GCN-NOHSA-VI-NEXT: s_ashr_i32 s15, s6, 16 1986; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 1987; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 1988; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 1989; GCN-NOHSA-VI-NEXT: s_ashr_i32 s12, s5, 16 1990; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 1991; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s17 1992; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 1993; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s16 1994; GCN-NOHSA-VI-NEXT: s_ashr_i32 s13, s4, 16 1995; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 1996; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 1997; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 1998; GCN-NOHSA-VI-NEXT: s_nop 0 1999; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 2000; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15 2001; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 2002; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s14 2003; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2004; GCN-NOHSA-VI-NEXT: s_nop 0 2005; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 2006; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13 2007; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 2008; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s12 2009; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2010; GCN-NOHSA-VI-NEXT: s_endpgm 2011; 2012; EG-LABEL: constant_sextload_v16i16_to_v16i32: 2013; EG: ; %bb.0: 2014; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2015; EG-NEXT: TEX 1 @8 2016; EG-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[] 2017; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0 2018; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0 2019; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0 2020; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1 2021; EG-NEXT: CF_END 2022; EG-NEXT: Fetch clause starting at 8: 2023; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 2024; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 2025; EG-NEXT: ALU clause starting at 12: 2026; EG-NEXT: MOV * T11.X, KC0[2].Z, 2027; EG-NEXT: ALU clause starting at 13: 2028; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 2029; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2030; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2031; EG-NEXT: LSHR T14.X, PV.W, literal.x, 2032; EG-NEXT: BFE_INT * T15.Z, T11.Y, 0.0, literal.y, 2033; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2034; EG-NEXT: BFE_INT T15.X, T11.X, 0.0, literal.x, 2035; EG-NEXT: LSHR T0.Y, T12.W, literal.x, 2036; EG-NEXT: BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212 2037; EG-NEXT: LSHR T0.W, T12.Y, literal.x, 2038; EG-NEXT: LSHR * T1.W, T11.Y, literal.x, 2039; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2040; EG-NEXT: BFE_INT T16.X, T11.Z, 0.0, literal.x, 2041; EG-NEXT: LSHR T1.Y, T11.W, literal.x, 2042; EG-NEXT: BFE_INT T17.Z, T12.Y, 0.0, literal.x, 2043; EG-NEXT: BFE_INT T15.W, PS, 0.0, literal.x, 2044; EG-NEXT: LSHR * T1.W, T11.X, literal.x, 2045; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2046; EG-NEXT: BFE_INT T17.X, T12.X, 0.0, literal.x, 2047; EG-NEXT: BFE_INT T15.Y, PS, 0.0, literal.x, 2048; EG-NEXT: BFE_INT T18.Z, T12.W, 0.0, literal.x, 2049; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, literal.x, 2050; EG-NEXT: LSHR * T1.W, T11.Z, literal.x, 2051; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2052; EG-NEXT: BFE_INT T18.X, T12.Z, 0.0, literal.x, 2053; EG-NEXT: BFE_INT T16.Y, PS, 0.0, literal.x, 2054; EG-NEXT: LSHR T0.Z, T12.X, literal.x, 2055; EG-NEXT: BFE_INT T17.W, T0.W, 0.0, literal.x, 2056; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2057; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44) 2058; EG-NEXT: LSHR T11.X, PS, literal.x, 2059; EG-NEXT: BFE_INT T17.Y, PV.Z, 0.0, literal.y, 2060; EG-NEXT: LSHR T0.Z, T12.Z, literal.y, 2061; EG-NEXT: BFE_INT T18.W, T0.Y, 0.0, literal.y, 2062; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2063; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2064; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2065; EG-NEXT: LSHR T12.X, PS, literal.x, 2066; EG-NEXT: BFE_INT * T18.Y, PV.Z, 0.0, literal.y, 2067; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2068 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in 2069 %ext = sext <16 x i16> %load to <16 x i32> 2070 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 2071 ret void 2072} 2073 2074define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 { 2075; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i32: 2076; GCN-NOHSA-SI: ; %bb.0: 2077; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x9 2078; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2079; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2080; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2081; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s1, 16 2082; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s0, 16 2083; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s3, 16 2084; GCN-NOHSA-SI-NEXT: s_lshr_b32 s21, s2, 16 2085; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s5, 16 2086; GCN-NOHSA-SI-NEXT: s_lshr_b32 s23, s4, 16 2087; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s7, 16 2088; GCN-NOHSA-SI-NEXT: s_lshr_b32 s25, s6, 16 2089; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s9, 16 2090; GCN-NOHSA-SI-NEXT: s_lshr_b32 s27, s8, 16 2091; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s11, 16 2092; GCN-NOHSA-SI-NEXT: s_lshr_b32 s29, s10, 16 2093; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s13, 16 2094; GCN-NOHSA-SI-NEXT: s_lshr_b32 s31, s12, 16 2095; GCN-NOHSA-SI-NEXT: s_lshr_b32 s33, s15, 16 2096; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s14, 16 2097; GCN-NOHSA-SI-NEXT: s_and_b32 s35, s1, 0xffff 2098; GCN-NOHSA-SI-NEXT: s_and_b32 s36, s0, 0xffff 2099; GCN-NOHSA-SI-NEXT: s_and_b32 s37, s3, 0xffff 2100; GCN-NOHSA-SI-NEXT: s_and_b32 s38, s2, 0xffff 2101; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 2102; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 2103; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, 0xffff 2104; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, 0xffff 2105; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, 0xffff 2106; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, 0xffff 2107; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, 0xffff 2108; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, 0xffff 2109; GCN-NOHSA-SI-NEXT: s_and_b32 s13, s13, 0xffff 2110; GCN-NOHSA-SI-NEXT: s_and_b32 s12, s12, 0xffff 2111; GCN-NOHSA-SI-NEXT: s_and_b32 s15, s15, 0xffff 2112; GCN-NOHSA-SI-NEXT: s_and_b32 s14, s14, 0xffff 2113; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2114; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2115; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 2116; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 2117; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 2118; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s34 2119; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 2120; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s33 2121; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 2122; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2123; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 2124; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s31 2125; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 2126; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s30 2127; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 2128; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2129; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 2130; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s29 2131; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 2132; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s28 2133; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 2134; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2135; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 2136; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s27 2137; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 2138; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s26 2139; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 2140; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2141; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 2142; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s25 2143; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 2144; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s24 2145; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2146; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2147; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 2148; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s23 2149; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 2150; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s22 2151; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2152; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2153; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s38 2154; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s21 2155; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s37 2156; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s20 2157; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2158; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2159; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36 2160; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 2161; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s35 2162; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18 2163; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2164; GCN-NOHSA-SI-NEXT: s_endpgm 2165; 2166; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i32: 2167; GCN-HSA: ; %bb.0: 2168; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2169; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2170; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0 2171; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2172; GCN-HSA-NEXT: s_lshr_b32 s20, s5, 16 2173; GCN-HSA-NEXT: s_lshr_b32 s21, s4, 16 2174; GCN-HSA-NEXT: s_lshr_b32 s22, s7, 16 2175; GCN-HSA-NEXT: s_lshr_b32 s23, s6, 16 2176; GCN-HSA-NEXT: s_lshr_b32 s24, s9, 16 2177; GCN-HSA-NEXT: s_lshr_b32 s25, s8, 16 2178; GCN-HSA-NEXT: s_lshr_b32 s26, s11, 16 2179; GCN-HSA-NEXT: s_lshr_b32 s27, s10, 16 2180; GCN-HSA-NEXT: s_lshr_b32 s28, s13, 16 2181; GCN-HSA-NEXT: s_lshr_b32 s29, s12, 16 2182; GCN-HSA-NEXT: s_lshr_b32 s30, s15, 16 2183; GCN-HSA-NEXT: s_lshr_b32 s31, s14, 16 2184; GCN-HSA-NEXT: s_lshr_b32 s33, s17, 16 2185; GCN-HSA-NEXT: s_lshr_b32 s34, s16, 16 2186; GCN-HSA-NEXT: s_lshr_b32 s35, s19, 16 2187; GCN-HSA-NEXT: s_lshr_b32 s36, s18, 16 2188; GCN-HSA-NEXT: s_and_b32 s5, s5, 0xffff 2189; GCN-HSA-NEXT: s_and_b32 s4, s4, 0xffff 2190; GCN-HSA-NEXT: s_and_b32 s7, s7, 0xffff 2191; GCN-HSA-NEXT: s_and_b32 s6, s6, 0xffff 2192; GCN-HSA-NEXT: s_and_b32 s9, s9, 0xffff 2193; GCN-HSA-NEXT: s_and_b32 s8, s8, 0xffff 2194; GCN-HSA-NEXT: s_and_b32 s11, s11, 0xffff 2195; GCN-HSA-NEXT: s_and_b32 s10, s10, 0xffff 2196; GCN-HSA-NEXT: s_and_b32 s13, s13, 0xffff 2197; GCN-HSA-NEXT: s_and_b32 s12, s12, 0xffff 2198; GCN-HSA-NEXT: s_and_b32 s15, s15, 0xffff 2199; GCN-HSA-NEXT: s_and_b32 s14, s14, 0xffff 2200; GCN-HSA-NEXT: s_and_b32 s17, s17, 0xffff 2201; GCN-HSA-NEXT: s_and_b32 s16, s16, 0xffff 2202; GCN-HSA-NEXT: s_and_b32 s19, s19, 0xffff 2203; GCN-HSA-NEXT: s_and_b32 s18, s18, 0xffff 2204; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 2205; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2206; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 2207; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 2208; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 2209; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2210; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3 2211; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2 2212; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 2213; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18 2214; GCN-HSA-NEXT: v_mov_b32_e32 v1, s36 2215; GCN-HSA-NEXT: v_mov_b32_e32 v2, s19 2216; GCN-HSA-NEXT: v_mov_b32_e32 v3, s35 2217; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 2218; GCN-HSA-NEXT: v_mov_b32_e32 v5, s34 2219; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2220; GCN-HSA-NEXT: v_mov_b32_e32 v6, s17 2221; GCN-HSA-NEXT: v_mov_b32_e32 v7, s33 2222; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 2223; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 2224; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 2225; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2226; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2227; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 2228; GCN-HSA-NEXT: v_mov_b32_e32 v1, s31 2229; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15 2230; GCN-HSA-NEXT: v_mov_b32_e32 v3, s30 2231; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2232; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2233; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2234; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2235; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 2236; GCN-HSA-NEXT: v_mov_b32_e32 v0, s12 2237; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29 2238; GCN-HSA-NEXT: v_mov_b32_e32 v2, s13 2239; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28 2240; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2241; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2242; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2243; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2244; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 2245; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 2246; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27 2247; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 2248; GCN-HSA-NEXT: v_mov_b32_e32 v3, s26 2249; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2250; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2251; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2252; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2253; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2254; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 2255; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 2256; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 2257; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24 2258; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2259; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2260; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2261; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 2262; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23 2263; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 2264; GCN-HSA-NEXT: v_mov_b32_e32 v3, s22 2265; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2266; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2267; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2268; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 2269; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 2270; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 2271; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20 2272; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2273; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2274; GCN-HSA-NEXT: s_endpgm 2275; 2276; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i32: 2277; GCN-NOHSA-VI: ; %bb.0: 2278; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x24 2279; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2280; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2281; GCN-NOHSA-VI-NEXT: s_mov_b32 s19, 0xf000 2282; GCN-NOHSA-VI-NEXT: s_mov_b32 s18, -1 2283; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2284; GCN-NOHSA-VI-NEXT: s_lshr_b32 s35, s15, 16 2285; GCN-NOHSA-VI-NEXT: s_lshr_b32 s36, s14, 16 2286; GCN-NOHSA-VI-NEXT: s_and_b32 s15, s15, 0xffff 2287; GCN-NOHSA-VI-NEXT: s_and_b32 s14, s14, 0xffff 2288; GCN-NOHSA-VI-NEXT: s_lshr_b32 s33, s13, 16 2289; GCN-NOHSA-VI-NEXT: s_lshr_b32 s34, s12, 16 2290; GCN-NOHSA-VI-NEXT: s_and_b32 s13, s13, 0xffff 2291; GCN-NOHSA-VI-NEXT: s_and_b32 s12, s12, 0xffff 2292; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 2293; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s36 2294; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15 2295; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 2296; GCN-NOHSA-VI-NEXT: s_lshr_b32 s30, s11, 16 2297; GCN-NOHSA-VI-NEXT: s_lshr_b32 s31, s10, 16 2298; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, 0xffff 2299; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, 0xffff 2300; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112 2301; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s9, 16 2302; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 2303; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s34 2304; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 2305; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33 2306; GCN-NOHSA-VI-NEXT: s_lshr_b32 s29, s8, 16 2307; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, 0xffff 2308; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, 0xffff 2309; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96 2310; GCN-NOHSA-VI-NEXT: s_lshr_b32 s26, s7, 16 2311; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 2312; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31 2313; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 2314; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s30 2315; GCN-NOHSA-VI-NEXT: s_lshr_b32 s27, s6, 16 2316; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, 0xffff 2317; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, 0xffff 2318; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80 2319; GCN-NOHSA-VI-NEXT: s_lshr_b32 s24, s5, 16 2320; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 2321; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29 2322; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 2323; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s28 2324; GCN-NOHSA-VI-NEXT: s_lshr_b32 s25, s4, 16 2325; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, 0xffff 2326; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, 0xffff 2327; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:64 2328; GCN-NOHSA-VI-NEXT: s_lshr_b32 s22, s3, 16 2329; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 2330; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s27 2331; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 2332; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s26 2333; GCN-NOHSA-VI-NEXT: s_lshr_b32 s23, s2, 16 2334; GCN-NOHSA-VI-NEXT: s_and_b32 s3, s3, 0xffff 2335; GCN-NOHSA-VI-NEXT: s_and_b32 s2, s2, 0xffff 2336; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48 2337; GCN-NOHSA-VI-NEXT: s_lshr_b32 s20, s1, 16 2338; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 2339; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25 2340; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 2341; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s24 2342; GCN-NOHSA-VI-NEXT: s_lshr_b32 s21, s0, 16 2343; GCN-NOHSA-VI-NEXT: s_and_b32 s1, s1, 0xffff 2344; GCN-NOHSA-VI-NEXT: s_and_b32 s0, s0, 0xffff 2345; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32 2346; GCN-NOHSA-VI-NEXT: s_nop 0 2347; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 2348; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23 2349; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s3 2350; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s22 2351; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16 2352; GCN-NOHSA-VI-NEXT: s_nop 0 2353; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 2354; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s21 2355; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s1 2356; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s20 2357; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 2358; GCN-NOHSA-VI-NEXT: s_endpgm 2359; 2360; EG-LABEL: constant_zextload_v32i16_to_v32i32: 2361; EG: ; %bb.0: 2362; EG-NEXT: ALU 0, @20, KC0[CB0:0-32], KC1[] 2363; EG-NEXT: TEX 3 @12 2364; EG-NEXT: ALU 71, @21, KC0[CB0:0-32], KC1[] 2365; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0 2366; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0 2367; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T32.X, 0 2368; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T22.X, 0 2369; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T29.X, 0 2370; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T19.X, 0 2371; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T26.X, 0 2372; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T20.X, 1 2373; EG-NEXT: CF_END 2374; EG-NEXT: Fetch clause starting at 12: 2375; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 2376; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 48, #1 2377; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1 2378; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 16, #1 2379; EG-NEXT: ALU clause starting at 20: 2380; EG-NEXT: MOV * T19.X, KC0[2].Z, 2381; EG-NEXT: ALU clause starting at 21: 2382; EG-NEXT: LSHR * T23.W, T20.Y, literal.x, 2383; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2384; EG-NEXT: AND_INT * T23.Z, T20.Y, literal.x, 2385; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2386; EG-NEXT: LSHR T23.Y, T20.X, literal.x, 2387; EG-NEXT: LSHR * T24.W, T20.W, literal.x, 2388; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2389; EG-NEXT: AND_INT T23.X, T20.X, literal.x, 2390; EG-NEXT: AND_INT T24.Z, T20.W, literal.x, 2391; EG-NEXT: LSHR * T20.X, KC0[2].Y, literal.y, 2392; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 2393; EG-NEXT: LSHR T24.Y, T20.Z, literal.x, 2394; EG-NEXT: LSHR * T25.W, T19.Y, literal.x, 2395; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2396; EG-NEXT: AND_INT T24.X, T20.Z, literal.x, 2397; EG-NEXT: AND_INT T25.Z, T19.Y, literal.x, 2398; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2399; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 2400; EG-NEXT: LSHR T26.X, PV.W, literal.x, 2401; EG-NEXT: LSHR T25.Y, T19.X, literal.y, 2402; EG-NEXT: LSHR T27.W, T19.W, literal.y, 2403; EG-NEXT: AND_INT * T25.X, T19.X, literal.z, 2404; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2405; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2406; EG-NEXT: AND_INT T27.Z, T19.W, literal.x, 2407; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2408; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2409; EG-NEXT: LSHR T19.X, PV.W, literal.x, 2410; EG-NEXT: LSHR T27.Y, T19.Z, literal.y, 2411; EG-NEXT: LSHR T28.W, T22.Y, literal.y, 2412; EG-NEXT: AND_INT * T27.X, T19.Z, literal.z, 2413; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2414; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2415; EG-NEXT: AND_INT T28.Z, T22.Y, literal.x, 2416; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2417; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 2418; EG-NEXT: LSHR T29.X, PV.W, literal.x, 2419; EG-NEXT: LSHR T28.Y, T22.X, literal.y, 2420; EG-NEXT: LSHR T30.W, T22.W, literal.y, 2421; EG-NEXT: AND_INT * T28.X, T22.X, literal.z, 2422; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2423; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2424; EG-NEXT: AND_INT T30.Z, T22.W, literal.x, 2425; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2426; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 2427; EG-NEXT: LSHR T22.X, PV.W, literal.x, 2428; EG-NEXT: LSHR T30.Y, T22.Z, literal.y, 2429; EG-NEXT: LSHR T31.W, T21.Y, literal.y, 2430; EG-NEXT: AND_INT * T30.X, T22.Z, literal.z, 2431; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2432; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2433; EG-NEXT: AND_INT T31.Z, T21.Y, literal.x, 2434; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2435; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 2436; EG-NEXT: LSHR T32.X, PV.W, literal.x, 2437; EG-NEXT: LSHR T31.Y, T21.X, literal.y, 2438; EG-NEXT: LSHR T33.W, T21.W, literal.y, 2439; EG-NEXT: AND_INT * T31.X, T21.X, literal.z, 2440; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2441; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2442; EG-NEXT: AND_INT T33.Z, T21.W, literal.x, 2443; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2444; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 2445; EG-NEXT: LSHR T21.X, PV.W, literal.x, 2446; EG-NEXT: LSHR T33.Y, T21.Z, literal.y, 2447; EG-NEXT: AND_INT * T33.X, T21.Z, literal.z, 2448; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2449; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2450; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2451; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 2452; EG-NEXT: LSHR * T34.X, PV.W, literal.x, 2453; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2454 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in 2455 %ext = zext <32 x i16> %load to <32 x i32> 2456 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 2457 ret void 2458} 2459 2460define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 { 2461; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i32: 2462; GCN-NOHSA-SI: ; %bb.0: 2463; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x9 2464; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2465; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2466; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2467; GCN-NOHSA-SI-NEXT: s_ashr_i32 s18, s1, 16 2468; GCN-NOHSA-SI-NEXT: s_ashr_i32 s19, s0, 16 2469; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s20, s1 2470; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s21, s0 2471; GCN-NOHSA-SI-NEXT: s_ashr_i32 s22, s3, 16 2472; GCN-NOHSA-SI-NEXT: s_ashr_i32 s23, s2, 16 2473; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s24, s3 2474; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s25, s2 2475; GCN-NOHSA-SI-NEXT: s_ashr_i32 s26, s5, 16 2476; GCN-NOHSA-SI-NEXT: s_ashr_i32 s27, s4, 16 2477; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 2478; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 2479; GCN-NOHSA-SI-NEXT: s_ashr_i32 s28, s7, 16 2480; GCN-NOHSA-SI-NEXT: s_ashr_i32 s29, s6, 16 2481; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7 2482; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 2483; GCN-NOHSA-SI-NEXT: s_ashr_i32 s30, s9, 16 2484; GCN-NOHSA-SI-NEXT: s_ashr_i32 s31, s8, 16 2485; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s9, s9 2486; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s8, s8 2487; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s11, 16 2488; GCN-NOHSA-SI-NEXT: s_ashr_i32 s34, s10, 16 2489; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s11, s11 2490; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s10, s10 2491; GCN-NOHSA-SI-NEXT: s_ashr_i32 s35, s13, 16 2492; GCN-NOHSA-SI-NEXT: s_ashr_i32 s36, s12, 16 2493; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s13, s13 2494; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s12, s12 2495; GCN-NOHSA-SI-NEXT: s_ashr_i32 s37, s15, 16 2496; GCN-NOHSA-SI-NEXT: s_ashr_i32 s38, s14, 16 2497; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s15, s15 2498; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s14, s14 2499; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2500; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2501; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 2502; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 2503; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 2504; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s38 2505; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 2506; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s37 2507; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 2508; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2509; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 2510; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s36 2511; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 2512; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s35 2513; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 2514; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2515; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 2516; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s34 2517; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 2518; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s33 2519; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 2520; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2521; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 2522; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s31 2523; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 2524; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s30 2525; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 2526; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2527; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 2528; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s29 2529; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 2530; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s28 2531; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2532; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2533; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 2534; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s27 2535; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 2536; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s26 2537; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2538; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2539; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s25 2540; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s23 2541; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24 2542; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s22 2543; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2544; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2545; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s21 2546; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 2547; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20 2548; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18 2549; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2550; GCN-NOHSA-SI-NEXT: s_endpgm 2551; 2552; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i32: 2553; GCN-HSA: ; %bb.0: 2554; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2555; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2556; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0 2557; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2558; GCN-HSA-NEXT: s_ashr_i32 s20, s5, 16 2559; GCN-HSA-NEXT: s_ashr_i32 s21, s4, 16 2560; GCN-HSA-NEXT: s_ashr_i32 s22, s7, 16 2561; GCN-HSA-NEXT: s_ashr_i32 s23, s6, 16 2562; GCN-HSA-NEXT: s_ashr_i32 s24, s9, 16 2563; GCN-HSA-NEXT: s_ashr_i32 s25, s8, 16 2564; GCN-HSA-NEXT: s_ashr_i32 s26, s11, 16 2565; GCN-HSA-NEXT: s_ashr_i32 s27, s10, 16 2566; GCN-HSA-NEXT: s_ashr_i32 s28, s13, 16 2567; GCN-HSA-NEXT: s_ashr_i32 s29, s12, 16 2568; GCN-HSA-NEXT: s_ashr_i32 s30, s15, 16 2569; GCN-HSA-NEXT: s_ashr_i32 s31, s14, 16 2570; GCN-HSA-NEXT: s_ashr_i32 s33, s17, 16 2571; GCN-HSA-NEXT: s_ashr_i32 s34, s16, 16 2572; GCN-HSA-NEXT: s_ashr_i32 s35, s19, 16 2573; GCN-HSA-NEXT: s_ashr_i32 s36, s18, 16 2574; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 2575; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2576; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 2577; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 2578; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 2579; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2580; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3 2581; GCN-HSA-NEXT: s_sext_i32_i16 s16, s16 2582; GCN-HSA-NEXT: s_sext_i32_i16 s19, s19 2583; GCN-HSA-NEXT: s_sext_i32_i16 s18, s18 2584; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2 2585; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 2586; GCN-HSA-NEXT: s_sext_i32_i16 s17, s17 2587; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18 2588; GCN-HSA-NEXT: v_mov_b32_e32 v1, s36 2589; GCN-HSA-NEXT: v_mov_b32_e32 v2, s19 2590; GCN-HSA-NEXT: v_mov_b32_e32 v3, s35 2591; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 2592; GCN-HSA-NEXT: v_mov_b32_e32 v5, s34 2593; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2594; GCN-HSA-NEXT: v_mov_b32_e32 v6, s17 2595; GCN-HSA-NEXT: v_mov_b32_e32 v7, s33 2596; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 2597; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 2598; GCN-HSA-NEXT: s_sext_i32_i16 s15, s15 2599; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2600; GCN-HSA-NEXT: s_sext_i32_i16 s14, s14 2601; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2602; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 2603; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 2604; GCN-HSA-NEXT: v_mov_b32_e32 v1, s31 2605; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15 2606; GCN-HSA-NEXT: v_mov_b32_e32 v3, s30 2607; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2608; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2609; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2610; GCN-HSA-NEXT: s_sext_i32_i16 s13, s13 2611; GCN-HSA-NEXT: s_sext_i32_i16 s12, s12 2612; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2613; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 2614; GCN-HSA-NEXT: v_mov_b32_e32 v0, s12 2615; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29 2616; GCN-HSA-NEXT: v_mov_b32_e32 v2, s13 2617; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28 2618; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2619; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2620; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2621; GCN-HSA-NEXT: s_sext_i32_i16 s11, s11 2622; GCN-HSA-NEXT: s_sext_i32_i16 s10, s10 2623; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2624; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 2625; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 2626; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27 2627; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 2628; GCN-HSA-NEXT: v_mov_b32_e32 v3, s26 2629; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2630; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2631; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2632; GCN-HSA-NEXT: s_sext_i32_i16 s9, s9 2633; GCN-HSA-NEXT: s_sext_i32_i16 s8, s8 2634; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2635; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2636; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 2637; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 2638; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 2639; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24 2640; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2641; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7 2642; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6 2643; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2644; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2645; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 2646; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23 2647; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 2648; GCN-HSA-NEXT: v_mov_b32_e32 v3, s22 2649; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2650; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5 2651; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4 2652; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2653; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2654; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 2655; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 2656; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 2657; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20 2658; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2659; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2660; GCN-HSA-NEXT: s_endpgm 2661; 2662; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i32: 2663; GCN-NOHSA-VI: ; %bb.0: 2664; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x24 2665; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2666; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2667; GCN-NOHSA-VI-NEXT: s_mov_b32 s19, 0xf000 2668; GCN-NOHSA-VI-NEXT: s_mov_b32 s18, -1 2669; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2670; GCN-NOHSA-VI-NEXT: s_ashr_i32 s35, s15, 16 2671; GCN-NOHSA-VI-NEXT: s_ashr_i32 s36, s14, 16 2672; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s15, s15 2673; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s14, s14 2674; GCN-NOHSA-VI-NEXT: s_ashr_i32 s33, s13, 16 2675; GCN-NOHSA-VI-NEXT: s_ashr_i32 s34, s12, 16 2676; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s13, s13 2677; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s12, s12 2678; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 2679; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s36 2680; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15 2681; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 2682; GCN-NOHSA-VI-NEXT: s_ashr_i32 s30, s11, 16 2683; GCN-NOHSA-VI-NEXT: s_ashr_i32 s31, s10, 16 2684; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s11, s11 2685; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s10, s10 2686; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112 2687; GCN-NOHSA-VI-NEXT: s_ashr_i32 s28, s9, 16 2688; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 2689; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s34 2690; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 2691; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33 2692; GCN-NOHSA-VI-NEXT: s_ashr_i32 s29, s8, 16 2693; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s9, s9 2694; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s8, s8 2695; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96 2696; GCN-NOHSA-VI-NEXT: s_ashr_i32 s26, s7, 16 2697; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 2698; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31 2699; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 2700; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s30 2701; GCN-NOHSA-VI-NEXT: s_ashr_i32 s27, s6, 16 2702; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 2703; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 2704; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80 2705; GCN-NOHSA-VI-NEXT: s_ashr_i32 s24, s5, 16 2706; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 2707; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29 2708; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 2709; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s28 2710; GCN-NOHSA-VI-NEXT: s_ashr_i32 s25, s4, 16 2711; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 2712; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 2713; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:64 2714; GCN-NOHSA-VI-NEXT: s_ashr_i32 s22, s3, 16 2715; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 2716; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s27 2717; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 2718; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s26 2719; GCN-NOHSA-VI-NEXT: s_ashr_i32 s23, s2, 16 2720; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s3, s3 2721; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s2, s2 2722; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48 2723; GCN-NOHSA-VI-NEXT: s_ashr_i32 s20, s1, 16 2724; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 2725; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25 2726; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 2727; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s24 2728; GCN-NOHSA-VI-NEXT: s_ashr_i32 s21, s0, 16 2729; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s1, s1 2730; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s0, s0 2731; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32 2732; GCN-NOHSA-VI-NEXT: s_nop 0 2733; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 2734; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23 2735; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s3 2736; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s22 2737; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16 2738; GCN-NOHSA-VI-NEXT: s_nop 0 2739; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 2740; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s21 2741; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s1 2742; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s20 2743; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 2744; GCN-NOHSA-VI-NEXT: s_endpgm 2745; 2746; EG-LABEL: constant_sextload_v32i16_to_v32i32: 2747; EG: ; %bb.0: 2748; EG-NEXT: ALU 8, @20, KC0[CB0:0-32], KC1[] 2749; EG-NEXT: TEX 3 @12 2750; EG-NEXT: ALU 73, @29, KC0[CB0:0-32], KC1[] 2751; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T24.X, 0 2752; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T22.X, 0 2753; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T28.X, 0 2754; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0 2755; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T26.X, 0 2756; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0 2757; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T20.X, 0 2758; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 1 2759; EG-NEXT: CF_END 2760; EG-NEXT: Fetch clause starting at 12: 2761; EG-NEXT: VTX_READ_128 T23.XYZW, T22.X, 16, #1 2762; EG-NEXT: VTX_READ_128 T24.XYZW, T22.X, 32, #1 2763; EG-NEXT: VTX_READ_128 T25.XYZW, T22.X, 0, #1 2764; EG-NEXT: VTX_READ_128 T22.XYZW, T22.X, 48, #1 2765; EG-NEXT: ALU clause starting at 20: 2766; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x, 2767; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2768; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2769; EG-NEXT: LSHR T20.X, PV.W, literal.x, 2770; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2771; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 2772; EG-NEXT: LSHR T21.X, PV.W, literal.x, 2773; EG-NEXT: MOV * T22.X, KC0[2].Z, 2774; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2775; EG-NEXT: ALU clause starting at 29: 2776; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2777; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2778; EG-NEXT: LSHR T26.X, PV.W, literal.x, 2779; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2780; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 2781; EG-NEXT: LSHR T27.X, PV.W, literal.x, 2782; EG-NEXT: LSHR T0.W, T22.W, literal.y, 2783; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 2784; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2785; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00) 2786; EG-NEXT: LSHR T28.X, PS, literal.x, 2787; EG-NEXT: LSHR T0.Y, T22.Y, literal.y, 2788; EG-NEXT: BFE_INT T29.Z, T25.Y, 0.0, literal.y, BS:VEC_120/SCL_212 2789; EG-NEXT: LSHR T1.W, T24.W, literal.y, 2790; EG-NEXT: LSHR * T2.W, T24.Y, literal.y, 2791; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2792; EG-NEXT: BFE_INT T29.X, T25.X, 0.0, literal.x, 2793; EG-NEXT: LSHR T1.Y, T23.W, literal.x, 2794; EG-NEXT: BFE_INT T30.Z, T25.W, 0.0, literal.x, BS:VEC_120/SCL_212 2795; EG-NEXT: LSHR T3.W, T23.Y, literal.x, 2796; EG-NEXT: LSHR * T4.W, T25.Y, literal.x, 2797; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2798; EG-NEXT: BFE_INT T30.X, T25.Z, 0.0, literal.x, 2799; EG-NEXT: LSHR T2.Y, T25.W, literal.x, 2800; EG-NEXT: BFE_INT T31.Z, T23.Y, 0.0, literal.x, 2801; EG-NEXT: BFE_INT T29.W, PS, 0.0, literal.x, 2802; EG-NEXT: LSHR * T4.W, T25.X, literal.x, 2803; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2804; EG-NEXT: BFE_INT T31.X, T23.X, 0.0, literal.x, 2805; EG-NEXT: BFE_INT T29.Y, PS, 0.0, literal.x, 2806; EG-NEXT: BFE_INT T32.Z, T23.W, 0.0, literal.x, 2807; EG-NEXT: BFE_INT T30.W, PV.Y, 0.0, literal.x, 2808; EG-NEXT: LSHR * T4.W, T25.Z, literal.x, 2809; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2810; EG-NEXT: BFE_INT T32.X, T23.Z, 0.0, literal.x, 2811; EG-NEXT: BFE_INT T30.Y, PS, 0.0, literal.x, 2812; EG-NEXT: BFE_INT T25.Z, T24.Y, 0.0, literal.x, 2813; EG-NEXT: BFE_INT T31.W, T3.W, 0.0, literal.x, 2814; EG-NEXT: LSHR * T3.W, T23.X, literal.x, 2815; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2816; EG-NEXT: BFE_INT T25.X, T24.X, 0.0, literal.x, 2817; EG-NEXT: BFE_INT T31.Y, PS, 0.0, literal.x, 2818; EG-NEXT: BFE_INT T33.Z, T24.W, 0.0, literal.x, 2819; EG-NEXT: BFE_INT T32.W, T1.Y, 0.0, literal.x, 2820; EG-NEXT: LSHR * T3.W, T23.Z, literal.x, 2821; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2822; EG-NEXT: BFE_INT T33.X, T24.Z, 0.0, literal.x, 2823; EG-NEXT: BFE_INT T32.Y, PS, 0.0, literal.x, 2824; EG-NEXT: BFE_INT T23.Z, T22.Y, 0.0, literal.x, 2825; EG-NEXT: BFE_INT T25.W, T2.W, 0.0, literal.x, 2826; EG-NEXT: LSHR * T2.W, T24.X, literal.x, 2827; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2828; EG-NEXT: BFE_INT T23.X, T22.X, 0.0, literal.x, 2829; EG-NEXT: BFE_INT T25.Y, PS, 0.0, literal.x, 2830; EG-NEXT: BFE_INT T34.Z, T22.W, 0.0, literal.x, 2831; EG-NEXT: BFE_INT T33.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212 2832; EG-NEXT: LSHR * T1.W, T24.Z, literal.x, 2833; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2834; EG-NEXT: BFE_INT T34.X, T22.Z, 0.0, literal.x, 2835; EG-NEXT: BFE_INT T33.Y, PS, 0.0, literal.x, 2836; EG-NEXT: LSHR T0.Z, T22.X, literal.x, 2837; EG-NEXT: BFE_INT T23.W, T0.Y, 0.0, literal.x, 2838; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 2839; EG-NEXT: 16(2.242078e-44), 96(1.345247e-43) 2840; EG-NEXT: LSHR T22.X, PS, literal.x, 2841; EG-NEXT: BFE_INT T23.Y, PV.Z, 0.0, literal.y, 2842; EG-NEXT: LSHR T0.Z, T22.Z, literal.y, 2843; EG-NEXT: BFE_INT T34.W, T0.W, 0.0, literal.y, 2844; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2845; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2846; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 2847; EG-NEXT: LSHR T24.X, PS, literal.x, 2848; EG-NEXT: BFE_INT * T34.Y, PV.Z, 0.0, literal.y, 2849; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2850 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in 2851 %ext = sext <32 x i16> %load to <32 x i32> 2852 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 2853 ret void 2854} 2855 2856define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 { 2857; GCN-NOHSA-SI-LABEL: constant_zextload_v64i16_to_v64i32: 2858; GCN-NOHSA-SI: ; %bb.0: 2859; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x9 2860; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2861; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2862; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[36:51], s[18:19], 0x10 2863; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2864; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s1, 16 2865; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s0, 16 2866; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s3, 16 2867; GCN-NOHSA-SI-NEXT: s_lshr_b32 s21, s2, 16 2868; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s5, 16 2869; GCN-NOHSA-SI-NEXT: s_lshr_b32 s23, s4, 16 2870; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s7, 16 2871; GCN-NOHSA-SI-NEXT: s_lshr_b32 s25, s6, 16 2872; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s9, 16 2873; GCN-NOHSA-SI-NEXT: s_lshr_b32 s27, s8, 16 2874; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s11, 16 2875; GCN-NOHSA-SI-NEXT: s_lshr_b32 s29, s10, 16 2876; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s13, 16 2877; GCN-NOHSA-SI-NEXT: s_lshr_b32 s31, s12, 16 2878; GCN-NOHSA-SI-NEXT: s_lshr_b32 s33, s15, 16 2879; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s14, 16 2880; GCN-NOHSA-SI-NEXT: s_and_b32 s35, s1, 0xffff 2881; GCN-NOHSA-SI-NEXT: s_and_b32 s52, s0, 0xffff 2882; GCN-NOHSA-SI-NEXT: s_and_b32 s53, s3, 0xffff 2883; GCN-NOHSA-SI-NEXT: s_and_b32 s54, s2, 0xffff 2884; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 2885; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 2886; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, 0xffff 2887; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, 0xffff 2888; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, 0xffff 2889; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, 0xffff 2890; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, 0xffff 2891; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, 0xffff 2892; GCN-NOHSA-SI-NEXT: s_and_b32 s13, s13, 0xffff 2893; GCN-NOHSA-SI-NEXT: s_and_b32 s12, s12, 0xffff 2894; GCN-NOHSA-SI-NEXT: s_and_b32 s15, s15, 0xffff 2895; GCN-NOHSA-SI-NEXT: s_and_b32 s14, s14, 0xffff 2896; GCN-NOHSA-SI-NEXT: s_lshr_b32 s55, s37, 16 2897; GCN-NOHSA-SI-NEXT: s_lshr_b32 s56, s36, 16 2898; GCN-NOHSA-SI-NEXT: s_lshr_b32 s57, s39, 16 2899; GCN-NOHSA-SI-NEXT: s_lshr_b32 s58, s38, 16 2900; GCN-NOHSA-SI-NEXT: s_lshr_b32 s59, s41, 16 2901; GCN-NOHSA-SI-NEXT: s_lshr_b32 s60, s40, 16 2902; GCN-NOHSA-SI-NEXT: s_lshr_b32 s61, s43, 16 2903; GCN-NOHSA-SI-NEXT: s_lshr_b32 s62, s42, 16 2904; GCN-NOHSA-SI-NEXT: s_lshr_b32 s63, s45, 16 2905; GCN-NOHSA-SI-NEXT: s_lshr_b32 s64, s44, 16 2906; GCN-NOHSA-SI-NEXT: s_lshr_b32 s65, s47, 16 2907; GCN-NOHSA-SI-NEXT: s_lshr_b32 s66, s46, 16 2908; GCN-NOHSA-SI-NEXT: s_lshr_b32 s67, s49, 16 2909; GCN-NOHSA-SI-NEXT: s_lshr_b32 s68, s48, 16 2910; GCN-NOHSA-SI-NEXT: s_lshr_b32 s69, s51, 16 2911; GCN-NOHSA-SI-NEXT: s_lshr_b32 s70, s50, 16 2912; GCN-NOHSA-SI-NEXT: s_and_b32 s37, s37, 0xffff 2913; GCN-NOHSA-SI-NEXT: s_and_b32 s36, s36, 0xffff 2914; GCN-NOHSA-SI-NEXT: s_and_b32 s39, s39, 0xffff 2915; GCN-NOHSA-SI-NEXT: s_and_b32 s38, s38, 0xffff 2916; GCN-NOHSA-SI-NEXT: s_and_b32 s40, s40, 0xffff 2917; GCN-NOHSA-SI-NEXT: s_and_b32 s43, s43, 0xffff 2918; GCN-NOHSA-SI-NEXT: s_and_b32 s42, s42, 0xffff 2919; GCN-NOHSA-SI-NEXT: s_and_b32 s45, s45, 0xffff 2920; GCN-NOHSA-SI-NEXT: s_and_b32 s44, s44, 0xffff 2921; GCN-NOHSA-SI-NEXT: s_and_b32 s47, s47, 0xffff 2922; GCN-NOHSA-SI-NEXT: s_and_b32 s46, s46, 0xffff 2923; GCN-NOHSA-SI-NEXT: s_and_b32 s49, s49, 0xffff 2924; GCN-NOHSA-SI-NEXT: s_and_b32 s48, s48, 0xffff 2925; GCN-NOHSA-SI-NEXT: s_and_b32 s51, s51, 0xffff 2926; GCN-NOHSA-SI-NEXT: s_and_b32 s50, s50, 0xffff 2927; GCN-NOHSA-SI-NEXT: s_and_b32 s41, s41, 0xffff 2928; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 2929; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 2930; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2931; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2932; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s50 2933; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s70 2934; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s51 2935; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s69 2936; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s48 2937; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s68 2938; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s49 2939; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s67 2940; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s46 2941; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s66 2942; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s47 2943; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s65 2944; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s44 2945; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s64 2946; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s45 2947; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s63 2948; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s42 2949; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s62 2950; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s43 2951; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s40 2952; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s61 2953; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s60 2954; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s41 2955; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s59 2956; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 2957; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224 2958; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 2959; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192 2960; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 2961; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160 2962; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(5) 2963; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s38 2964; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s58 2965; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s39 2966; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s57 2967; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 2968; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2969; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36 2970; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s56 2971; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s37 2972; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s55 2973; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 2974; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2975; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 2976; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s34 2977; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 2978; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s33 2979; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 2980; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2981; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 2982; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s31 2983; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 2984; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s30 2985; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 2986; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2987; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 2988; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s29 2989; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 2990; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s28 2991; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 2992; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2993; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 2994; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s27 2995; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 2996; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s26 2997; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 2998; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2999; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 3000; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s25 3001; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 3002; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s24 3003; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3004; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3005; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 3006; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s23 3007; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 3008; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s22 3009; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 3010; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3011; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s54 3012; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s21 3013; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s53 3014; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s20 3015; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3016; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3017; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s52 3018; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 3019; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s35 3020; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18 3021; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3022; GCN-NOHSA-SI-NEXT: s_endpgm 3023; 3024; GCN-HSA-LABEL: constant_zextload_v64i16_to_v64i32: 3025; GCN-HSA: ; %bb.0: 3026; GCN-HSA-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x0 3027; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3028; GCN-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 3029; GCN-HSA-NEXT: s_load_dwordx16 s[36:51], s[18:19], 0x10 3030; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3031; GCN-HSA-NEXT: s_lshr_b32 s20, s1, 16 3032; GCN-HSA-NEXT: s_lshr_b32 s21, s0, 16 3033; GCN-HSA-NEXT: s_lshr_b32 s22, s3, 16 3034; GCN-HSA-NEXT: s_lshr_b32 s23, s2, 16 3035; GCN-HSA-NEXT: s_lshr_b32 s24, s5, 16 3036; GCN-HSA-NEXT: s_lshr_b32 s25, s4, 16 3037; GCN-HSA-NEXT: s_lshr_b32 s26, s7, 16 3038; GCN-HSA-NEXT: s_lshr_b32 s27, s6, 16 3039; GCN-HSA-NEXT: s_lshr_b32 s28, s9, 16 3040; GCN-HSA-NEXT: s_lshr_b32 s29, s8, 16 3041; GCN-HSA-NEXT: s_lshr_b32 s30, s11, 16 3042; GCN-HSA-NEXT: s_lshr_b32 s31, s10, 16 3043; GCN-HSA-NEXT: s_lshr_b32 s33, s13, 16 3044; GCN-HSA-NEXT: s_lshr_b32 s34, s12, 16 3045; GCN-HSA-NEXT: s_lshr_b32 s35, s15, 16 3046; GCN-HSA-NEXT: s_lshr_b32 s52, s14, 16 3047; GCN-HSA-NEXT: s_and_b32 s1, s1, 0xffff 3048; GCN-HSA-NEXT: s_and_b32 s0, s0, 0xffff 3049; GCN-HSA-NEXT: s_and_b32 s3, s3, 0xffff 3050; GCN-HSA-NEXT: s_and_b32 s2, s2, 0xffff 3051; GCN-HSA-NEXT: s_and_b32 s5, s5, 0xffff 3052; GCN-HSA-NEXT: s_and_b32 s4, s4, 0xffff 3053; GCN-HSA-NEXT: s_and_b32 s53, s7, 0xffff 3054; GCN-HSA-NEXT: s_and_b32 s54, s6, 0xffff 3055; GCN-HSA-NEXT: s_and_b32 s9, s9, 0xffff 3056; GCN-HSA-NEXT: s_and_b32 s8, s8, 0xffff 3057; GCN-HSA-NEXT: s_and_b32 s11, s11, 0xffff 3058; GCN-HSA-NEXT: s_and_b32 s10, s10, 0xffff 3059; GCN-HSA-NEXT: s_and_b32 s13, s13, 0xffff 3060; GCN-HSA-NEXT: s_and_b32 s12, s12, 0xffff 3061; GCN-HSA-NEXT: s_and_b32 s15, s15, 0xffff 3062; GCN-HSA-NEXT: s_and_b32 s14, s14, 0xffff 3063; GCN-HSA-NEXT: s_lshr_b32 s18, s37, 16 3064; GCN-HSA-NEXT: s_lshr_b32 s19, s36, 16 3065; GCN-HSA-NEXT: s_lshr_b32 s55, s39, 16 3066; GCN-HSA-NEXT: s_lshr_b32 s56, s38, 16 3067; GCN-HSA-NEXT: s_lshr_b32 s57, s41, 16 3068; GCN-HSA-NEXT: s_lshr_b32 s58, s40, 16 3069; GCN-HSA-NEXT: s_lshr_b32 s59, s43, 16 3070; GCN-HSA-NEXT: s_lshr_b32 s60, s42, 16 3071; GCN-HSA-NEXT: s_lshr_b32 s61, s45, 16 3072; GCN-HSA-NEXT: s_lshr_b32 s62, s44, 16 3073; GCN-HSA-NEXT: s_lshr_b32 s63, s47, 16 3074; GCN-HSA-NEXT: s_lshr_b32 s64, s46, 16 3075; GCN-HSA-NEXT: s_lshr_b32 s65, s49, 16 3076; GCN-HSA-NEXT: s_lshr_b32 s66, s48, 16 3077; GCN-HSA-NEXT: s_lshr_b32 s67, s51, 16 3078; GCN-HSA-NEXT: s_lshr_b32 s68, s50, 16 3079; GCN-HSA-NEXT: s_and_b32 s37, s37, 0xffff 3080; GCN-HSA-NEXT: s_and_b32 s36, s36, 0xffff 3081; GCN-HSA-NEXT: s_and_b32 s39, s39, 0xffff 3082; GCN-HSA-NEXT: s_and_b32 s38, s38, 0xffff 3083; GCN-HSA-NEXT: s_and_b32 s41, s41, 0xffff 3084; GCN-HSA-NEXT: s_and_b32 s40, s40, 0xffff 3085; GCN-HSA-NEXT: s_and_b32 s43, s43, 0xffff 3086; GCN-HSA-NEXT: s_and_b32 s42, s42, 0xffff 3087; GCN-HSA-NEXT: s_and_b32 s45, s45, 0xffff 3088; GCN-HSA-NEXT: s_and_b32 s44, s44, 0xffff 3089; GCN-HSA-NEXT: s_and_b32 s47, s47, 0xffff 3090; GCN-HSA-NEXT: s_and_b32 s46, s46, 0xffff 3091; GCN-HSA-NEXT: s_and_b32 s49, s49, 0xffff 3092; GCN-HSA-NEXT: s_and_b32 s48, s48, 0xffff 3093; GCN-HSA-NEXT: s_and_b32 s51, s51, 0xffff 3094; GCN-HSA-NEXT: s_and_b32 s50, s50, 0xffff 3095; GCN-HSA-NEXT: s_add_u32 s6, s16, 0xf0 3096; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3097; GCN-HSA-NEXT: v_mov_b32_e32 v22, s7 3098; GCN-HSA-NEXT: v_mov_b32_e32 v21, s6 3099; GCN-HSA-NEXT: s_add_u32 s6, s16, 0xe0 3100; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3101; GCN-HSA-NEXT: v_mov_b32_e32 v25, s7 3102; GCN-HSA-NEXT: v_mov_b32_e32 v24, s6 3103; GCN-HSA-NEXT: s_add_u32 s6, s16, 0xd0 3104; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3105; GCN-HSA-NEXT: v_mov_b32_e32 v27, s7 3106; GCN-HSA-NEXT: v_mov_b32_e32 v26, s6 3107; GCN-HSA-NEXT: s_add_u32 s6, s16, 0xc0 3108; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3109; GCN-HSA-NEXT: v_mov_b32_e32 v29, s7 3110; GCN-HSA-NEXT: v_mov_b32_e32 v28, s6 3111; GCN-HSA-NEXT: s_add_u32 s6, s16, 0xb0 3112; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3113; GCN-HSA-NEXT: v_mov_b32_e32 v31, s7 3114; GCN-HSA-NEXT: v_mov_b32_e32 v30, s6 3115; GCN-HSA-NEXT: s_add_u32 s6, s16, 0xa0 3116; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3117; GCN-HSA-NEXT: v_mov_b32_e32 v33, s7 3118; GCN-HSA-NEXT: v_mov_b32_e32 v32, s6 3119; GCN-HSA-NEXT: s_add_u32 s6, s16, 0x90 3120; GCN-HSA-NEXT: v_mov_b32_e32 v4, s48 3121; GCN-HSA-NEXT: v_mov_b32_e32 v5, s66 3122; GCN-HSA-NEXT: v_mov_b32_e32 v6, s49 3123; GCN-HSA-NEXT: v_mov_b32_e32 v7, s65 3124; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3125; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7] 3126; GCN-HSA-NEXT: v_mov_b32_e32 v25, s7 3127; GCN-HSA-NEXT: v_mov_b32_e32 v24, s6 3128; GCN-HSA-NEXT: s_add_u32 s6, s16, 0x80 3129; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3130; GCN-HSA-NEXT: v_mov_b32_e32 v35, s7 3131; GCN-HSA-NEXT: v_mov_b32_e32 v34, s6 3132; GCN-HSA-NEXT: s_add_u32 s6, s16, 0x70 3133; GCN-HSA-NEXT: v_mov_b32_e32 v16, s42 3134; GCN-HSA-NEXT: v_mov_b32_e32 v17, s60 3135; GCN-HSA-NEXT: v_mov_b32_e32 v18, s43 3136; GCN-HSA-NEXT: v_mov_b32_e32 v19, s59 3137; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3138; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19] 3139; GCN-HSA-NEXT: v_mov_b32_e32 v0, s50 3140; GCN-HSA-NEXT: v_mov_b32_e32 v17, s7 3141; GCN-HSA-NEXT: v_mov_b32_e32 v16, s6 3142; GCN-HSA-NEXT: s_add_u32 s6, s16, 0x60 3143; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3144; GCN-HSA-NEXT: v_mov_b32_e32 v1, s68 3145; GCN-HSA-NEXT: v_mov_b32_e32 v2, s51 3146; GCN-HSA-NEXT: v_mov_b32_e32 v3, s67 3147; GCN-HSA-NEXT: v_mov_b32_e32 v19, s7 3148; GCN-HSA-NEXT: v_mov_b32_e32 v8, s46 3149; GCN-HSA-NEXT: v_mov_b32_e32 v9, s64 3150; GCN-HSA-NEXT: v_mov_b32_e32 v10, s47 3151; GCN-HSA-NEXT: v_mov_b32_e32 v11, s63 3152; GCN-HSA-NEXT: v_mov_b32_e32 v12, s44 3153; GCN-HSA-NEXT: v_mov_b32_e32 v13, s62 3154; GCN-HSA-NEXT: v_mov_b32_e32 v14, s45 3155; GCN-HSA-NEXT: v_mov_b32_e32 v15, s61 3156; GCN-HSA-NEXT: v_mov_b32_e32 v20, s40 3157; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[0:3] 3158; GCN-HSA-NEXT: v_mov_b32_e32 v21, s58 3159; GCN-HSA-NEXT: v_mov_b32_e32 v0, s38 3160; GCN-HSA-NEXT: v_mov_b32_e32 v22, s41 3161; GCN-HSA-NEXT: v_mov_b32_e32 v23, s57 3162; GCN-HSA-NEXT: v_mov_b32_e32 v1, s56 3163; GCN-HSA-NEXT: v_mov_b32_e32 v2, s39 3164; GCN-HSA-NEXT: v_mov_b32_e32 v4, s36 3165; GCN-HSA-NEXT: v_mov_b32_e32 v3, s55 3166; GCN-HSA-NEXT: v_mov_b32_e32 v5, s19 3167; GCN-HSA-NEXT: v_mov_b32_e32 v18, s6 3168; GCN-HSA-NEXT: s_add_u32 s6, s16, 0x50 3169; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11] 3170; GCN-HSA-NEXT: v_mov_b32_e32 v6, s37 3171; GCN-HSA-NEXT: v_mov_b32_e32 v8, s14 3172; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15] 3173; GCN-HSA-NEXT: v_mov_b32_e32 v7, s18 3174; GCN-HSA-NEXT: v_mov_b32_e32 v12, s12 3175; GCN-HSA-NEXT: v_mov_b32_e32 v9, s52 3176; GCN-HSA-NEXT: v_mov_b32_e32 v10, s15 3177; GCN-HSA-NEXT: v_mov_b32_e32 v11, s35 3178; GCN-HSA-NEXT: v_mov_b32_e32 v13, s34 3179; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[20:23] 3180; GCN-HSA-NEXT: v_mov_b32_e32 v14, s13 3181; GCN-HSA-NEXT: v_mov_b32_e32 v15, s33 3182; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3] 3183; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[4:7] 3184; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 3185; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 3186; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3187; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6 3188; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 3189; GCN-HSA-NEXT: v_mov_b32_e32 v1, s31 3190; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 3191; GCN-HSA-NEXT: v_mov_b32_e32 v3, s30 3192; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7 3193; GCN-HSA-NEXT: s_add_u32 s6, s16, 64 3194; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3195; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3196; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6 3197; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 3198; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29 3199; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 3200; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28 3201; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7 3202; GCN-HSA-NEXT: s_add_u32 s6, s16, 48 3203; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3204; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3205; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6 3206; GCN-HSA-NEXT: v_mov_b32_e32 v0, s54 3207; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27 3208; GCN-HSA-NEXT: v_mov_b32_e32 v2, s53 3209; GCN-HSA-NEXT: v_mov_b32_e32 v3, s26 3210; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7 3211; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3212; GCN-HSA-NEXT: s_nop 0 3213; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 3214; GCN-HSA-NEXT: s_add_u32 s4, s16, 32 3215; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 3216; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 3217; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 3218; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 3219; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24 3220; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 3221; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3222; GCN-HSA-NEXT: s_nop 0 3223; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 3224; GCN-HSA-NEXT: s_add_u32 s2, s16, 16 3225; GCN-HSA-NEXT: v_mov_b32_e32 v2, s3 3226; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 3227; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3228; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23 3229; GCN-HSA-NEXT: v_mov_b32_e32 v3, s22 3230; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3231; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3232; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 3233; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 3234; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 3235; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 3236; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20 3237; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17 3238; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3239; GCN-HSA-NEXT: s_endpgm 3240; 3241; GCN-NOHSA-VI-LABEL: constant_zextload_v64i16_to_v64i32: 3242; GCN-NOHSA-VI: ; %bb.0: 3243; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[36:39], s[0:1], 0x24 3244; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3245; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[16:31], s[38:39], 0x40 3246; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[38:39], 0x0 3247; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3248; GCN-NOHSA-VI-NEXT: s_lshr_b32 s69, s31, 16 3249; GCN-NOHSA-VI-NEXT: s_lshr_b32 s70, s30, 16 3250; GCN-NOHSA-VI-NEXT: s_and_b32 s31, s31, 0xffff 3251; GCN-NOHSA-VI-NEXT: s_and_b32 s30, s30, 0xffff 3252; GCN-NOHSA-VI-NEXT: s_lshr_b32 s33, s1, 16 3253; GCN-NOHSA-VI-NEXT: s_lshr_b32 s34, s0, 16 3254; GCN-NOHSA-VI-NEXT: s_lshr_b32 s35, s3, 16 3255; GCN-NOHSA-VI-NEXT: s_lshr_b32 s40, s2, 16 3256; GCN-NOHSA-VI-NEXT: s_and_b32 s60, s1, 0xffff 3257; GCN-NOHSA-VI-NEXT: s_and_b32 s61, s0, 0xffff 3258; GCN-NOHSA-VI-NEXT: s_and_b32 s62, s3, 0xffff 3259; GCN-NOHSA-VI-NEXT: s_and_b32 s63, s2, 0xffff 3260; GCN-NOHSA-VI-NEXT: s_lshr_b32 s67, s29, 16 3261; GCN-NOHSA-VI-NEXT: s_lshr_b32 s68, s28, 16 3262; GCN-NOHSA-VI-NEXT: s_and_b32 s29, s29, 0xffff 3263; GCN-NOHSA-VI-NEXT: s_and_b32 s28, s28, 0xffff 3264; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 3265; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 3266; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s36 3267; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s37 3268; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30 3269; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s70 3270; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s31 3271; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s69 3272; GCN-NOHSA-VI-NEXT: s_lshr_b32 s65, s27, 16 3273; GCN-NOHSA-VI-NEXT: s_lshr_b32 s66, s26, 16 3274; GCN-NOHSA-VI-NEXT: s_and_b32 s27, s27, 0xffff 3275; GCN-NOHSA-VI-NEXT: s_and_b32 s26, s26, 0xffff 3276; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 3277; GCN-NOHSA-VI-NEXT: s_lshr_b32 s59, s25, 16 3278; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 3279; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s68 3280; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s29 3281; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s67 3282; GCN-NOHSA-VI-NEXT: s_lshr_b32 s64, s24, 16 3283; GCN-NOHSA-VI-NEXT: s_and_b32 s25, s25, 0xffff 3284; GCN-NOHSA-VI-NEXT: s_and_b32 s24, s24, 0xffff 3285; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 3286; GCN-NOHSA-VI-NEXT: s_lshr_b32 s57, s23, 16 3287; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s26 3288; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s66 3289; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s27 3290; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s65 3291; GCN-NOHSA-VI-NEXT: s_lshr_b32 s58, s22, 16 3292; GCN-NOHSA-VI-NEXT: s_and_b32 s23, s23, 0xffff 3293; GCN-NOHSA-VI-NEXT: s_and_b32 s22, s22, 0xffff 3294; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 3295; GCN-NOHSA-VI-NEXT: s_lshr_b32 s55, s21, 16 3296; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 3297; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s64 3298; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s25 3299; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s59 3300; GCN-NOHSA-VI-NEXT: s_lshr_b32 s56, s20, 16 3301; GCN-NOHSA-VI-NEXT: s_and_b32 s21, s21, 0xffff 3302; GCN-NOHSA-VI-NEXT: s_and_b32 s20, s20, 0xffff 3303; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 3304; GCN-NOHSA-VI-NEXT: s_lshr_b32 s53, s19, 16 3305; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22 3306; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s58 3307; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s23 3308; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s57 3309; GCN-NOHSA-VI-NEXT: s_lshr_b32 s54, s18, 16 3310; GCN-NOHSA-VI-NEXT: s_and_b32 s19, s19, 0xffff 3311; GCN-NOHSA-VI-NEXT: s_and_b32 s18, s18, 0xffff 3312; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 3313; GCN-NOHSA-VI-NEXT: s_lshr_b32 s51, s17, 16 3314; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20 3315; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s56 3316; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s21 3317; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s55 3318; GCN-NOHSA-VI-NEXT: s_lshr_b32 s52, s16, 16 3319; GCN-NOHSA-VI-NEXT: s_and_b32 s17, s17, 0xffff 3320; GCN-NOHSA-VI-NEXT: s_and_b32 s16, s16, 0xffff 3321; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 3322; GCN-NOHSA-VI-NEXT: s_lshr_b32 s49, s15, 16 3323; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 3324; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s54 3325; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19 3326; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s53 3327; GCN-NOHSA-VI-NEXT: s_lshr_b32 s50, s14, 16 3328; GCN-NOHSA-VI-NEXT: s_and_b32 s15, s15, 0xffff 3329; GCN-NOHSA-VI-NEXT: s_and_b32 s14, s14, 0xffff 3330; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 3331; GCN-NOHSA-VI-NEXT: s_lshr_b32 s38, s13, 16 3332; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16 3333; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s52 3334; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s17 3335; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s51 3336; GCN-NOHSA-VI-NEXT: s_lshr_b32 s39, s12, 16 3337; GCN-NOHSA-VI-NEXT: s_and_b32 s13, s13, 0xffff 3338; GCN-NOHSA-VI-NEXT: s_and_b32 s12, s12, 0xffff 3339; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 3340; GCN-NOHSA-VI-NEXT: s_lshr_b32 s47, s11, 16 3341; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 3342; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s50 3343; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15 3344; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s49 3345; GCN-NOHSA-VI-NEXT: s_lshr_b32 s48, s10, 16 3346; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, 0xffff 3347; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, 0xffff 3348; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 3349; GCN-NOHSA-VI-NEXT: s_lshr_b32 s45, s9, 16 3350; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 3351; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s39 3352; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 3353; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s38 3354; GCN-NOHSA-VI-NEXT: s_lshr_b32 s46, s8, 16 3355; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, 0xffff 3356; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, 0xffff 3357; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 3358; GCN-NOHSA-VI-NEXT: s_lshr_b32 s43, s7, 16 3359; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 3360; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s48 3361; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 3362; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s47 3363; GCN-NOHSA-VI-NEXT: s_lshr_b32 s44, s6, 16 3364; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, 0xffff 3365; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, 0xffff 3366; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 3367; GCN-NOHSA-VI-NEXT: s_lshr_b32 s41, s5, 16 3368; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 3369; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s46 3370; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 3371; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s45 3372; GCN-NOHSA-VI-NEXT: s_lshr_b32 s42, s4, 16 3373; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, 0xffff 3374; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, 0xffff 3375; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 3376; GCN-NOHSA-VI-NEXT: s_nop 0 3377; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 3378; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s44 3379; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 3380; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s43 3381; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3382; GCN-NOHSA-VI-NEXT: s_nop 0 3383; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 3384; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s42 3385; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 3386; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s41 3387; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 3388; GCN-NOHSA-VI-NEXT: s_nop 0 3389; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s63 3390; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s40 3391; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s62 3392; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 3393; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3394; GCN-NOHSA-VI-NEXT: s_nop 0 3395; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s61 3396; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s34 3397; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s60 3398; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33 3399; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3400; GCN-NOHSA-VI-NEXT: s_endpgm 3401; 3402; EG-LABEL: constant_zextload_v64i16_to_v64i32: 3403; EG: ; %bb.0: 3404; EG-NEXT: ALU 0, @38, KC0[CB0:0-32], KC1[] 3405; EG-NEXT: TEX 3 @22 3406; EG-NEXT: ALU 55, @39, KC0[CB0:0-32], KC1[] 3407; EG-NEXT: TEX 3 @30 3408; EG-NEXT: ALU 87, @95, KC0[CB0:0-32], KC1[] 3409; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0 3410; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T49.X, 0 3411; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T64.X, 0 3412; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T50.X, 0 3413; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T61.X, 0 3414; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T51.X, 0 3415; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T56.XYZW, T58.X, 0 3416; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T52.X, 0 3417; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T53.XYZW, T55.X, 0 3418; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 0 3419; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T48.X, 0 3420; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T40.X, 0 3421; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T46.X, 0 3422; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T41.X, 0 3423; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T43.X, 0 3424; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T36.X, 1 3425; EG-NEXT: CF_END 3426; EG-NEXT: Fetch clause starting at 22: 3427; EG-NEXT: VTX_READ_128 T36.XYZW, T35.X, 0, #1 3428; EG-NEXT: VTX_READ_128 T39.XYZW, T35.X, 48, #1 3429; EG-NEXT: VTX_READ_128 T40.XYZW, T35.X, 32, #1 3430; EG-NEXT: VTX_READ_128 T41.XYZW, T35.X, 16, #1 3431; EG-NEXT: Fetch clause starting at 30: 3432; EG-NEXT: VTX_READ_128 T49.XYZW, T35.X, 112, #1 3433; EG-NEXT: VTX_READ_128 T50.XYZW, T35.X, 96, #1 3434; EG-NEXT: VTX_READ_128 T51.XYZW, T35.X, 80, #1 3435; EG-NEXT: VTX_READ_128 T52.XYZW, T35.X, 64, #1 3436; EG-NEXT: ALU clause starting at 38: 3437; EG-NEXT: MOV * T35.X, KC0[2].Z, 3438; EG-NEXT: ALU clause starting at 39: 3439; EG-NEXT: LSHR * T37.W, T36.Y, literal.x, 3440; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3441; EG-NEXT: AND_INT * T37.Z, T36.Y, literal.x, 3442; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3443; EG-NEXT: LSHR T37.Y, T36.X, literal.x, 3444; EG-NEXT: LSHR * T38.W, T36.W, literal.x, 3445; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3446; EG-NEXT: AND_INT T37.X, T36.X, literal.x, 3447; EG-NEXT: AND_INT T38.Z, T36.W, literal.x, 3448; EG-NEXT: LSHR * T36.X, KC0[2].Y, literal.y, 3449; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 3450; EG-NEXT: LSHR T38.Y, T36.Z, literal.x, 3451; EG-NEXT: LSHR * T42.W, T41.Y, literal.x, 3452; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3453; EG-NEXT: AND_INT T38.X, T36.Z, literal.x, 3454; EG-NEXT: AND_INT T42.Z, T41.Y, literal.x, 3455; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3456; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 3457; EG-NEXT: LSHR T43.X, PV.W, literal.x, 3458; EG-NEXT: LSHR T42.Y, T41.X, literal.y, 3459; EG-NEXT: LSHR T44.W, T41.W, literal.y, 3460; EG-NEXT: AND_INT * T42.X, T41.X, literal.z, 3461; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3462; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3463; EG-NEXT: AND_INT T44.Z, T41.W, literal.x, 3464; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3465; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 3466; EG-NEXT: LSHR T41.X, PV.W, literal.x, 3467; EG-NEXT: LSHR T44.Y, T41.Z, literal.y, 3468; EG-NEXT: LSHR T45.W, T40.Y, literal.y, 3469; EG-NEXT: AND_INT * T44.X, T41.Z, literal.z, 3470; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3471; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3472; EG-NEXT: AND_INT T45.Z, T40.Y, literal.x, 3473; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3474; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 3475; EG-NEXT: LSHR T46.X, PV.W, literal.x, 3476; EG-NEXT: LSHR T45.Y, T40.X, literal.y, 3477; EG-NEXT: LSHR T47.W, T40.W, literal.y, 3478; EG-NEXT: AND_INT * T45.X, T40.X, literal.z, 3479; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3480; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3481; EG-NEXT: AND_INT T47.Z, T40.W, literal.x, 3482; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3483; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 3484; EG-NEXT: LSHR T40.X, PV.W, literal.x, 3485; EG-NEXT: LSHR T47.Y, T40.Z, literal.y, 3486; EG-NEXT: AND_INT * T47.X, T40.Z, literal.z, 3487; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3488; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3489; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.x, 3490; EG-NEXT: LSHR * T35.W, T39.Y, literal.y, 3491; EG-NEXT: 80(1.121039e-43), 16(2.242078e-44) 3492; EG-NEXT: LSHR T48.X, PV.W, literal.x, 3493; EG-NEXT: AND_INT * T35.Z, T39.Y, literal.y, 3494; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41) 3495; EG-NEXT: ALU clause starting at 95: 3496; EG-NEXT: LSHR T35.Y, T39.X, literal.x, 3497; EG-NEXT: LSHR * T53.W, T39.W, literal.x, 3498; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3499; EG-NEXT: AND_INT T35.X, T39.X, literal.x, 3500; EG-NEXT: AND_INT T53.Z, T39.W, literal.x, 3501; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3502; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 3503; EG-NEXT: LSHR T39.X, PV.W, literal.x, 3504; EG-NEXT: LSHR T53.Y, T39.Z, literal.y, 3505; EG-NEXT: LSHR T54.W, T52.Y, literal.y, 3506; EG-NEXT: AND_INT * T53.X, T39.Z, literal.z, 3507; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3508; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3509; EG-NEXT: AND_INT T54.Z, T52.Y, literal.x, 3510; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3511; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 3512; EG-NEXT: LSHR T55.X, PV.W, literal.x, 3513; EG-NEXT: LSHR T54.Y, T52.X, literal.y, 3514; EG-NEXT: LSHR T56.W, T52.W, literal.y, 3515; EG-NEXT: AND_INT * T54.X, T52.X, literal.z, 3516; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3517; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3518; EG-NEXT: AND_INT T56.Z, T52.W, literal.x, 3519; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3520; EG-NEXT: 65535(9.183409e-41), 128(1.793662e-43) 3521; EG-NEXT: LSHR T52.X, PV.W, literal.x, 3522; EG-NEXT: LSHR T56.Y, T52.Z, literal.y, 3523; EG-NEXT: LSHR T57.W, T51.Y, literal.y, 3524; EG-NEXT: AND_INT * T56.X, T52.Z, literal.z, 3525; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3526; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3527; EG-NEXT: AND_INT T57.Z, T51.Y, literal.x, 3528; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3529; EG-NEXT: 65535(9.183409e-41), 144(2.017870e-43) 3530; EG-NEXT: LSHR T58.X, PV.W, literal.x, 3531; EG-NEXT: LSHR T57.Y, T51.X, literal.y, 3532; EG-NEXT: LSHR T59.W, T51.W, literal.y, 3533; EG-NEXT: AND_INT * T57.X, T51.X, literal.z, 3534; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3535; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3536; EG-NEXT: AND_INT T59.Z, T51.W, literal.x, 3537; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3538; EG-NEXT: 65535(9.183409e-41), 160(2.242078e-43) 3539; EG-NEXT: LSHR T51.X, PV.W, literal.x, 3540; EG-NEXT: LSHR T59.Y, T51.Z, literal.y, 3541; EG-NEXT: LSHR T60.W, T50.Y, literal.y, 3542; EG-NEXT: AND_INT * T59.X, T51.Z, literal.z, 3543; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3544; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3545; EG-NEXT: AND_INT T60.Z, T50.Y, literal.x, 3546; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3547; EG-NEXT: 65535(9.183409e-41), 176(2.466285e-43) 3548; EG-NEXT: LSHR T61.X, PV.W, literal.x, 3549; EG-NEXT: LSHR T60.Y, T50.X, literal.y, 3550; EG-NEXT: LSHR T62.W, T50.W, literal.y, 3551; EG-NEXT: AND_INT * T60.X, T50.X, literal.z, 3552; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3553; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3554; EG-NEXT: AND_INT T62.Z, T50.W, literal.x, 3555; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3556; EG-NEXT: 65535(9.183409e-41), 192(2.690493e-43) 3557; EG-NEXT: LSHR T50.X, PV.W, literal.x, 3558; EG-NEXT: LSHR T62.Y, T50.Z, literal.y, 3559; EG-NEXT: LSHR T63.W, T49.Y, literal.y, 3560; EG-NEXT: AND_INT * T62.X, T50.Z, literal.z, 3561; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3562; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3563; EG-NEXT: AND_INT T63.Z, T49.Y, literal.x, 3564; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3565; EG-NEXT: 65535(9.183409e-41), 208(2.914701e-43) 3566; EG-NEXT: LSHR T64.X, PV.W, literal.x, 3567; EG-NEXT: LSHR T63.Y, T49.X, literal.y, 3568; EG-NEXT: LSHR T65.W, T49.W, literal.y, 3569; EG-NEXT: AND_INT * T63.X, T49.X, literal.z, 3570; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3571; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3572; EG-NEXT: AND_INT T65.Z, T49.W, literal.x, 3573; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3574; EG-NEXT: 65535(9.183409e-41), 224(3.138909e-43) 3575; EG-NEXT: LSHR T49.X, PV.W, literal.x, 3576; EG-NEXT: LSHR T65.Y, T49.Z, literal.y, 3577; EG-NEXT: AND_INT * T65.X, T49.Z, literal.z, 3578; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3579; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3580; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3581; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) 3582; EG-NEXT: LSHR * T66.X, PV.W, literal.x, 3583; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 3584 %load = load <64 x i16>, <64 x i16> addrspace(4)* %in 3585 %ext = zext <64 x i16> %load to <64 x i32> 3586 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 3587 ret void 3588} 3589 3590define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 { 3591; GCN-NOHSA-SI-LABEL: constant_sextload_v64i16_to_v64i32: 3592; GCN-NOHSA-SI: ; %bb.0: 3593; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[36:39], s[0:1], 0x9 3594; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 3595; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[38:39], 0x0 3596; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[16:31], s[38:39], 0x10 3597; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 3598; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s1, 16 3599; GCN-NOHSA-SI-NEXT: s_ashr_i32 s34, s0, 16 3600; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s35, s1 3601; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s38, s0 3602; GCN-NOHSA-SI-NEXT: s_ashr_i32 s39, s3, 16 3603; GCN-NOHSA-SI-NEXT: s_ashr_i32 s40, s2, 16 3604; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s41, s3 3605; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s42, s2 3606; GCN-NOHSA-SI-NEXT: s_ashr_i32 s43, s5, 16 3607; GCN-NOHSA-SI-NEXT: s_ashr_i32 s44, s4, 16 3608; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 3609; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 3610; GCN-NOHSA-SI-NEXT: s_ashr_i32 s45, s7, 16 3611; GCN-NOHSA-SI-NEXT: s_ashr_i32 s46, s6, 16 3612; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7 3613; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 3614; GCN-NOHSA-SI-NEXT: s_ashr_i32 s47, s9, 16 3615; GCN-NOHSA-SI-NEXT: s_ashr_i32 s48, s8, 16 3616; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s9, s9 3617; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s8, s8 3618; GCN-NOHSA-SI-NEXT: s_ashr_i32 s49, s11, 16 3619; GCN-NOHSA-SI-NEXT: s_ashr_i32 s50, s10, 16 3620; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s11, s11 3621; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s10, s10 3622; GCN-NOHSA-SI-NEXT: s_ashr_i32 s51, s13, 16 3623; GCN-NOHSA-SI-NEXT: s_ashr_i32 s52, s12, 16 3624; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s13, s13 3625; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s12, s12 3626; GCN-NOHSA-SI-NEXT: s_ashr_i32 s53, s15, 16 3627; GCN-NOHSA-SI-NEXT: s_ashr_i32 s54, s14, 16 3628; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s15, s15 3629; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s14, s14 3630; GCN-NOHSA-SI-NEXT: s_ashr_i32 s55, s17, 16 3631; GCN-NOHSA-SI-NEXT: s_ashr_i32 s56, s16, 16 3632; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s17, s17 3633; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s16, s16 3634; GCN-NOHSA-SI-NEXT: s_ashr_i32 s57, s19, 16 3635; GCN-NOHSA-SI-NEXT: s_ashr_i32 s58, s18, 16 3636; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s19, s19 3637; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s18, s18 3638; GCN-NOHSA-SI-NEXT: s_ashr_i32 s59, s21, 16 3639; GCN-NOHSA-SI-NEXT: s_ashr_i32 s60, s20, 16 3640; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s21, s21 3641; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s20, s20 3642; GCN-NOHSA-SI-NEXT: s_ashr_i32 s61, s22, 16 3643; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s62, s23 3644; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s22, s22 3645; GCN-NOHSA-SI-NEXT: s_ashr_i32 s63, s25, 16 3646; GCN-NOHSA-SI-NEXT: s_ashr_i32 s64, s24, 16 3647; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s25, s25 3648; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s24, s24 3649; GCN-NOHSA-SI-NEXT: s_ashr_i32 s65, s27, 16 3650; GCN-NOHSA-SI-NEXT: s_ashr_i32 s66, s26, 16 3651; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s27, s27 3652; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s26, s26 3653; GCN-NOHSA-SI-NEXT: s_ashr_i32 s67, s29, 16 3654; GCN-NOHSA-SI-NEXT: s_ashr_i32 s68, s28, 16 3655; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s29, s29 3656; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s28, s28 3657; GCN-NOHSA-SI-NEXT: s_ashr_i32 s69, s31, 16 3658; GCN-NOHSA-SI-NEXT: s_ashr_i32 s70, s30, 16 3659; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s31, s31 3660; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s30, s30 3661; GCN-NOHSA-SI-NEXT: s_ashr_i32 s23, s23, 16 3662; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s36 3663; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s37 3664; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 3665; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 3666; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s30 3667; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s70 3668; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s31 3669; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s69 3670; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s28 3671; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s68 3672; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s29 3673; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s67 3674; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s26 3675; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s66 3676; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s27 3677; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s65 3678; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s24 3679; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s64 3680; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s25 3681; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s63 3682; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s22 3683; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s61 3684; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s62 3685; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s20 3686; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s23 3687; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s60 3688; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s21 3689; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s59 3690; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 3691; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224 3692; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 3693; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192 3694; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 3695; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160 3696; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(5) 3697; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 3698; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s58 3699; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s19 3700; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s57 3701; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 3702; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3703; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s16 3704; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s56 3705; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s17 3706; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s55 3707; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 3708; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3709; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 3710; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s54 3711; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 3712; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s53 3713; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 3714; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3715; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 3716; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s52 3717; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 3718; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s51 3719; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 3720; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3721; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 3722; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s50 3723; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 3724; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s49 3725; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 3726; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3727; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 3728; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s48 3729; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 3730; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s47 3731; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 3732; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3733; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 3734; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s46 3735; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 3736; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s45 3737; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3738; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3739; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 3740; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s44 3741; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 3742; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s43 3743; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 3744; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3745; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s42 3746; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s40 3747; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s41 3748; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s39 3749; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3750; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3751; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s38 3752; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s34 3753; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s35 3754; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s33 3755; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3756; GCN-NOHSA-SI-NEXT: s_endpgm 3757; 3758; GCN-HSA-LABEL: constant_sextload_v64i16_to_v64i32: 3759; GCN-HSA: ; %bb.0: 3760; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3761; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3762; GCN-HSA-NEXT: s_load_dwordx16 s[16:31], s[2:3], 0x0 3763; GCN-HSA-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x10 3764; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3765; GCN-HSA-NEXT: s_ashr_i32 s4, s17, 16 3766; GCN-HSA-NEXT: s_ashr_i32 s5, s16, 16 3767; GCN-HSA-NEXT: s_sext_i32_i16 s6, s17 3768; GCN-HSA-NEXT: s_sext_i32_i16 s7, s16 3769; GCN-HSA-NEXT: s_ashr_i32 s8, s19, 16 3770; GCN-HSA-NEXT: s_ashr_i32 s9, s18, 16 3771; GCN-HSA-NEXT: s_sext_i32_i16 s10, s19 3772; GCN-HSA-NEXT: s_sext_i32_i16 s11, s18 3773; GCN-HSA-NEXT: s_ashr_i32 s12, s21, 16 3774; GCN-HSA-NEXT: s_ashr_i32 s13, s20, 16 3775; GCN-HSA-NEXT: s_sext_i32_i16 s14, s21 3776; GCN-HSA-NEXT: s_sext_i32_i16 s15, s20 3777; GCN-HSA-NEXT: s_ashr_i32 s16, s23, 16 3778; GCN-HSA-NEXT: s_ashr_i32 s17, s22, 16 3779; GCN-HSA-NEXT: s_sext_i32_i16 s18, s23 3780; GCN-HSA-NEXT: s_sext_i32_i16 s19, s22 3781; GCN-HSA-NEXT: s_ashr_i32 s20, s25, 16 3782; GCN-HSA-NEXT: s_ashr_i32 s21, s24, 16 3783; GCN-HSA-NEXT: s_sext_i32_i16 s22, s25 3784; GCN-HSA-NEXT: s_sext_i32_i16 s23, s24 3785; GCN-HSA-NEXT: s_ashr_i32 s24, s27, 16 3786; GCN-HSA-NEXT: s_ashr_i32 s25, s26, 16 3787; GCN-HSA-NEXT: s_ashr_i32 s33, s29, 16 3788; GCN-HSA-NEXT: s_ashr_i32 s34, s28, 16 3789; GCN-HSA-NEXT: s_ashr_i32 s35, s31, 16 3790; GCN-HSA-NEXT: s_ashr_i32 s52, s30, 16 3791; GCN-HSA-NEXT: s_ashr_i32 s53, s37, 16 3792; GCN-HSA-NEXT: s_ashr_i32 s54, s36, 16 3793; GCN-HSA-NEXT: s_ashr_i32 s55, s39, 16 3794; GCN-HSA-NEXT: s_ashr_i32 s56, s38, 16 3795; GCN-HSA-NEXT: s_ashr_i32 s57, s41, 16 3796; GCN-HSA-NEXT: s_ashr_i32 s58, s40, 16 3797; GCN-HSA-NEXT: s_ashr_i32 s59, s43, 16 3798; GCN-HSA-NEXT: s_ashr_i32 s60, s42, 16 3799; GCN-HSA-NEXT: s_ashr_i32 s61, s45, 16 3800; GCN-HSA-NEXT: s_ashr_i32 s62, s44, 16 3801; GCN-HSA-NEXT: s_ashr_i32 s63, s47, 16 3802; GCN-HSA-NEXT: s_ashr_i32 s64, s46, 16 3803; GCN-HSA-NEXT: s_ashr_i32 s65, s49, 16 3804; GCN-HSA-NEXT: s_ashr_i32 s66, s48, 16 3805; GCN-HSA-NEXT: s_ashr_i32 s67, s51, 16 3806; GCN-HSA-NEXT: s_ashr_i32 s68, s50, 16 3807; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0 3808; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3809; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3 3810; GCN-HSA-NEXT: v_mov_b32_e32 v21, s2 3811; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 3812; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3813; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 3814; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 3815; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0 3816; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3817; GCN-HSA-NEXT: v_mov_b32_e32 v27, s3 3818; GCN-HSA-NEXT: v_mov_b32_e32 v26, s2 3819; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 3820; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3821; GCN-HSA-NEXT: v_mov_b32_e32 v29, s3 3822; GCN-HSA-NEXT: v_mov_b32_e32 v28, s2 3823; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0 3824; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3825; GCN-HSA-NEXT: v_mov_b32_e32 v31, s3 3826; GCN-HSA-NEXT: v_mov_b32_e32 v30, s2 3827; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0 3828; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3829; GCN-HSA-NEXT: v_mov_b32_e32 v33, s3 3830; GCN-HSA-NEXT: s_sext_i32_i16 s49, s49 3831; GCN-HSA-NEXT: s_sext_i32_i16 s48, s48 3832; GCN-HSA-NEXT: v_mov_b32_e32 v32, s2 3833; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90 3834; GCN-HSA-NEXT: v_mov_b32_e32 v4, s48 3835; GCN-HSA-NEXT: v_mov_b32_e32 v5, s66 3836; GCN-HSA-NEXT: v_mov_b32_e32 v6, s49 3837; GCN-HSA-NEXT: v_mov_b32_e32 v7, s65 3838; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3839; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7] 3840; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 3841; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 3842; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 3843; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3844; GCN-HSA-NEXT: v_mov_b32_e32 v35, s3 3845; GCN-HSA-NEXT: s_sext_i32_i16 s43, s43 3846; GCN-HSA-NEXT: s_sext_i32_i16 s42, s42 3847; GCN-HSA-NEXT: v_mov_b32_e32 v34, s2 3848; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 3849; GCN-HSA-NEXT: v_mov_b32_e32 v16, s42 3850; GCN-HSA-NEXT: v_mov_b32_e32 v17, s60 3851; GCN-HSA-NEXT: v_mov_b32_e32 v18, s43 3852; GCN-HSA-NEXT: v_mov_b32_e32 v19, s59 3853; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3854; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19] 3855; GCN-HSA-NEXT: s_sext_i32_i16 s51, s51 3856; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 3857; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 3858; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 3859; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3860; GCN-HSA-NEXT: s_sext_i32_i16 s50, s50 3861; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 3862; GCN-HSA-NEXT: s_sext_i32_i16 s36, s36 3863; GCN-HSA-NEXT: s_sext_i32_i16 s39, s39 3864; GCN-HSA-NEXT: s_sext_i32_i16 s38, s38 3865; GCN-HSA-NEXT: s_sext_i32_i16 s41, s41 3866; GCN-HSA-NEXT: s_sext_i32_i16 s40, s40 3867; GCN-HSA-NEXT: s_sext_i32_i16 s45, s45 3868; GCN-HSA-NEXT: s_sext_i32_i16 s44, s44 3869; GCN-HSA-NEXT: s_sext_i32_i16 s47, s47 3870; GCN-HSA-NEXT: s_sext_i32_i16 s46, s46 3871; GCN-HSA-NEXT: v_mov_b32_e32 v0, s50 3872; GCN-HSA-NEXT: v_mov_b32_e32 v1, s68 3873; GCN-HSA-NEXT: v_mov_b32_e32 v2, s51 3874; GCN-HSA-NEXT: v_mov_b32_e32 v3, s67 3875; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 3876; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 3877; GCN-HSA-NEXT: s_sext_i32_i16 s29, s29 3878; GCN-HSA-NEXT: s_sext_i32_i16 s28, s28 3879; GCN-HSA-NEXT: s_sext_i32_i16 s31, s31 3880; GCN-HSA-NEXT: s_sext_i32_i16 s30, s30 3881; GCN-HSA-NEXT: s_sext_i32_i16 s37, s37 3882; GCN-HSA-NEXT: v_mov_b32_e32 v8, s46 3883; GCN-HSA-NEXT: v_mov_b32_e32 v9, s64 3884; GCN-HSA-NEXT: v_mov_b32_e32 v10, s47 3885; GCN-HSA-NEXT: v_mov_b32_e32 v11, s63 3886; GCN-HSA-NEXT: v_mov_b32_e32 v12, s44 3887; GCN-HSA-NEXT: v_mov_b32_e32 v13, s62 3888; GCN-HSA-NEXT: v_mov_b32_e32 v14, s45 3889; GCN-HSA-NEXT: v_mov_b32_e32 v15, s61 3890; GCN-HSA-NEXT: v_mov_b32_e32 v20, s40 3891; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[0:3] 3892; GCN-HSA-NEXT: v_mov_b32_e32 v21, s58 3893; GCN-HSA-NEXT: v_mov_b32_e32 v0, s38 3894; GCN-HSA-NEXT: v_mov_b32_e32 v22, s41 3895; GCN-HSA-NEXT: v_mov_b32_e32 v23, s57 3896; GCN-HSA-NEXT: v_mov_b32_e32 v1, s56 3897; GCN-HSA-NEXT: v_mov_b32_e32 v2, s39 3898; GCN-HSA-NEXT: v_mov_b32_e32 v4, s36 3899; GCN-HSA-NEXT: v_mov_b32_e32 v3, s55 3900; GCN-HSA-NEXT: v_mov_b32_e32 v5, s54 3901; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3902; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11] 3903; GCN-HSA-NEXT: v_mov_b32_e32 v6, s37 3904; GCN-HSA-NEXT: v_mov_b32_e32 v8, s30 3905; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15] 3906; GCN-HSA-NEXT: v_mov_b32_e32 v7, s53 3907; GCN-HSA-NEXT: v_mov_b32_e32 v12, s28 3908; GCN-HSA-NEXT: v_mov_b32_e32 v9, s52 3909; GCN-HSA-NEXT: v_mov_b32_e32 v10, s31 3910; GCN-HSA-NEXT: v_mov_b32_e32 v11, s35 3911; GCN-HSA-NEXT: v_mov_b32_e32 v13, s34 3912; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[20:23] 3913; GCN-HSA-NEXT: v_mov_b32_e32 v14, s29 3914; GCN-HSA-NEXT: v_mov_b32_e32 v15, s33 3915; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3] 3916; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[4:7] 3917; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 3918; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 3919; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3920; GCN-HSA-NEXT: s_sext_i32_i16 s27, s27 3921; GCN-HSA-NEXT: s_sext_i32_i16 s26, s26 3922; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3923; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 3924; GCN-HSA-NEXT: v_mov_b32_e32 v0, s26 3925; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 3926; GCN-HSA-NEXT: v_mov_b32_e32 v2, s27 3927; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24 3928; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3929; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3930; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3931; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3932; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 3933; GCN-HSA-NEXT: v_mov_b32_e32 v0, s23 3934; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 3935; GCN-HSA-NEXT: v_mov_b32_e32 v2, s22 3936; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20 3937; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3938; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3939; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3940; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3941; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 3942; GCN-HSA-NEXT: v_mov_b32_e32 v0, s19 3943; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17 3944; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18 3945; GCN-HSA-NEXT: v_mov_b32_e32 v3, s16 3946; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3947; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3948; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3949; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3950; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 3951; GCN-HSA-NEXT: v_mov_b32_e32 v0, s15 3952; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13 3953; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14 3954; GCN-HSA-NEXT: v_mov_b32_e32 v3, s12 3955; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3956; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3957; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3958; GCN-HSA-NEXT: v_mov_b32_e32 v0, s11 3959; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 3960; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 3961; GCN-HSA-NEXT: v_mov_b32_e32 v3, s8 3962; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3963; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3964; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 3965; GCN-HSA-NEXT: v_mov_b32_e32 v0, s7 3966; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 3967; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 3968; GCN-HSA-NEXT: v_mov_b32_e32 v3, s4 3969; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 3970; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3971; GCN-HSA-NEXT: s_endpgm 3972; 3973; GCN-NOHSA-VI-LABEL: constant_sextload_v64i16_to_v64i32: 3974; GCN-NOHSA-VI: ; %bb.0: 3975; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[36:39], s[0:1], 0x24 3976; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3977; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[16:31], s[38:39], 0x0 3978; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[38:39], 0x40 3979; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3980; GCN-NOHSA-VI-NEXT: s_ashr_i32 s49, s31, 16 3981; GCN-NOHSA-VI-NEXT: s_ashr_i32 s69, s15, 16 3982; GCN-NOHSA-VI-NEXT: s_ashr_i32 s70, s14, 16 3983; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s15, s15 3984; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s14, s14 3985; GCN-NOHSA-VI-NEXT: s_ashr_i32 s51, s1, 16 3986; GCN-NOHSA-VI-NEXT: s_ashr_i32 s52, s0, 16 3987; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s53, s1 3988; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s54, s0 3989; GCN-NOHSA-VI-NEXT: s_ashr_i32 s55, s3, 16 3990; GCN-NOHSA-VI-NEXT: s_ashr_i32 s56, s2, 16 3991; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s57, s3 3992; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s58, s2 3993; GCN-NOHSA-VI-NEXT: s_ashr_i32 s67, s13, 16 3994; GCN-NOHSA-VI-NEXT: s_ashr_i32 s68, s12, 16 3995; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s13, s13 3996; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s12, s12 3997; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 3998; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 3999; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s36 4000; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s37 4001; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 4002; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s70 4003; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15 4004; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s69 4005; GCN-NOHSA-VI-NEXT: s_ashr_i32 s65, s11, 16 4006; GCN-NOHSA-VI-NEXT: s_ashr_i32 s66, s10, 16 4007; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s11, s11 4008; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s10, s10 4009; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 4010; GCN-NOHSA-VI-NEXT: s_ashr_i32 s63, s9, 16 4011; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 4012; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s68 4013; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 4014; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s67 4015; GCN-NOHSA-VI-NEXT: s_ashr_i32 s64, s8, 16 4016; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s9, s9 4017; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s8, s8 4018; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 4019; GCN-NOHSA-VI-NEXT: s_ashr_i32 s61, s7, 16 4020; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 4021; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s66 4022; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 4023; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s65 4024; GCN-NOHSA-VI-NEXT: s_ashr_i32 s62, s6, 16 4025; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 4026; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 4027; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 4028; GCN-NOHSA-VI-NEXT: s_ashr_i32 s59, s5, 16 4029; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 4030; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s64 4031; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 4032; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s63 4033; GCN-NOHSA-VI-NEXT: s_ashr_i32 s60, s4, 16 4034; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 4035; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 4036; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 4037; GCN-NOHSA-VI-NEXT: s_ashr_i32 s50, s30, 16 4038; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 4039; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s62 4040; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 4041; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s61 4042; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 4043; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s31, s31 4044; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 4045; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s60 4046; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 4047; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s59 4048; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 4049; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s30, s30 4050; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s58 4051; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s56 4052; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s57 4053; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s55 4054; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 4055; GCN-NOHSA-VI-NEXT: s_ashr_i32 s47, s29, 16 4056; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s54 4057; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s52 4058; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s53 4059; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s51 4060; GCN-NOHSA-VI-NEXT: s_ashr_i32 s48, s28, 16 4061; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s29, s29 4062; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s28, s28 4063; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 4064; GCN-NOHSA-VI-NEXT: s_ashr_i32 s45, s27, 16 4065; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30 4066; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s50 4067; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s31 4068; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s49 4069; GCN-NOHSA-VI-NEXT: s_ashr_i32 s46, s26, 16 4070; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s27, s27 4071; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s26, s26 4072; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 4073; GCN-NOHSA-VI-NEXT: s_ashr_i32 s43, s25, 16 4074; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 4075; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s48 4076; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s29 4077; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s47 4078; GCN-NOHSA-VI-NEXT: s_ashr_i32 s44, s24, 16 4079; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s25, s25 4080; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s24, s24 4081; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 4082; GCN-NOHSA-VI-NEXT: s_ashr_i32 s41, s23, 16 4083; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s26 4084; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s46 4085; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s27 4086; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s45 4087; GCN-NOHSA-VI-NEXT: s_ashr_i32 s42, s22, 16 4088; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s23, s23 4089; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s22, s22 4090; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 4091; GCN-NOHSA-VI-NEXT: s_ashr_i32 s39, s21, 16 4092; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 4093; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s44 4094; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s25 4095; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s43 4096; GCN-NOHSA-VI-NEXT: s_ashr_i32 s40, s20, 16 4097; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s21, s21 4098; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s20, s20 4099; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 4100; GCN-NOHSA-VI-NEXT: s_ashr_i32 s35, s19, 16 4101; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22 4102; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s42 4103; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s23 4104; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s41 4105; GCN-NOHSA-VI-NEXT: s_ashr_i32 s38, s18, 16 4106; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s19, s19 4107; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s18, s18 4108; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 4109; GCN-NOHSA-VI-NEXT: s_ashr_i32 s33, s17, 16 4110; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20 4111; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s40 4112; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s21 4113; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s39 4114; GCN-NOHSA-VI-NEXT: s_ashr_i32 s34, s16, 16 4115; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s17, s17 4116; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s16, s16 4117; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 4118; GCN-NOHSA-VI-NEXT: s_nop 0 4119; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 4120; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s38 4121; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19 4122; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 4123; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4124; GCN-NOHSA-VI-NEXT: s_nop 0 4125; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16 4126; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s34 4127; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s17 4128; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33 4129; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4130; GCN-NOHSA-VI-NEXT: s_endpgm 4131; 4132; EG-LABEL: constant_sextload_v64i16_to_v64i32: 4133; EG: ; %bb.0: 4134; EG-NEXT: ALU 17, @38, KC0[CB0:0-32], KC1[] 4135; EG-NEXT: TEX 7 @22 4136; EG-NEXT: ALU 75, @56, KC0[CB0:0-32], KC1[] 4137; EG-NEXT: ALU 71, @132, KC0[CB0:0-32], KC1[] 4138; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T66.XYZW, T48.X, 0 4139; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T41.X, 0 4140; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T56.X, 0 4141; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T55.X, 0 4142; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T54.X, 0 4143; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T53.X, 0 4144; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T52.X, 0 4145; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T51.X, 0 4146; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T50.X, 0 4147; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T49.X, 0 4148; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T40.X, 0 4149; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T39.X, 0 4150; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T38.X, 0 4151; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T37.X, 0 4152; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T36.X, 0 4153; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T35.X, 1 4154; EG-NEXT: CF_END 4155; EG-NEXT: PAD 4156; EG-NEXT: Fetch clause starting at 22: 4157; EG-NEXT: VTX_READ_128 T42.XYZW, T41.X, 16, #1 4158; EG-NEXT: VTX_READ_128 T43.XYZW, T41.X, 32, #1 4159; EG-NEXT: VTX_READ_128 T44.XYZW, T41.X, 0, #1 4160; EG-NEXT: VTX_READ_128 T45.XYZW, T41.X, 48, #1 4161; EG-NEXT: VTX_READ_128 T46.XYZW, T41.X, 64, #1 4162; EG-NEXT: VTX_READ_128 T47.XYZW, T41.X, 80, #1 4163; EG-NEXT: VTX_READ_128 T48.XYZW, T41.X, 96, #1 4164; EG-NEXT: VTX_READ_128 T41.XYZW, T41.X, 112, #1 4165; EG-NEXT: ALU clause starting at 38: 4166; EG-NEXT: LSHR T35.X, KC0[2].Y, literal.x, 4167; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4168; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4169; EG-NEXT: LSHR T36.X, PV.W, literal.x, 4170; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4171; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 4172; EG-NEXT: LSHR T37.X, PV.W, literal.x, 4173; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4174; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 4175; EG-NEXT: LSHR T38.X, PV.W, literal.x, 4176; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4177; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 4178; EG-NEXT: LSHR T39.X, PV.W, literal.x, 4179; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4180; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 4181; EG-NEXT: LSHR T40.X, PV.W, literal.x, 4182; EG-NEXT: MOV * T41.X, KC0[2].Z, 4183; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4184; EG-NEXT: ALU clause starting at 56: 4185; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4186; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 4187; EG-NEXT: LSHR T49.X, PV.W, literal.x, 4188; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4189; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 4190; EG-NEXT: LSHR T50.X, PV.W, literal.x, 4191; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4192; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 4193; EG-NEXT: LSHR T51.X, PV.W, literal.x, 4194; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4195; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 4196; EG-NEXT: LSHR T52.X, PV.W, literal.x, 4197; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4198; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 4199; EG-NEXT: LSHR T53.X, PV.W, literal.x, 4200; EG-NEXT: LSHR T0.Y, T41.W, literal.y, 4201; EG-NEXT: LSHR T0.Z, T41.Y, literal.y, 4202; EG-NEXT: LSHR T0.W, T48.W, literal.y, BS:VEC_120/SCL_212 4203; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 4204; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4205; EG-NEXT: 176(2.466285e-43), 0(0.000000e+00) 4206; EG-NEXT: LSHR T54.X, PS, literal.x, 4207; EG-NEXT: LSHR T1.Y, T48.Y, literal.y, 4208; EG-NEXT: LSHR T1.Z, T47.W, literal.y, 4209; EG-NEXT: LSHR T1.W, T47.Y, literal.y, BS:VEC_120/SCL_212 4210; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.z, 4211; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4212; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00) 4213; EG-NEXT: LSHR T55.X, PS, literal.x, 4214; EG-NEXT: LSHR T2.Y, T46.W, literal.y, 4215; EG-NEXT: LSHR T2.Z, T46.Y, literal.y, 4216; EG-NEXT: LSHR T2.W, T45.W, literal.y, BS:VEC_120/SCL_212 4217; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.z, 4218; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4219; EG-NEXT: 208(2.914701e-43), 0(0.000000e+00) 4220; EG-NEXT: LSHR T56.X, PS, literal.x, 4221; EG-NEXT: LSHR T3.Y, T45.Y, literal.y, 4222; EG-NEXT: BFE_INT T57.Z, T44.Y, 0.0, literal.y, BS:VEC_120/SCL_212 4223; EG-NEXT: LSHR T3.W, T43.W, literal.y, 4224; EG-NEXT: LSHR * T4.W, T43.Y, literal.y, 4225; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4226; EG-NEXT: BFE_INT T57.X, T44.X, 0.0, literal.x, 4227; EG-NEXT: LSHR T4.Y, T42.W, literal.x, 4228; EG-NEXT: BFE_INT T58.Z, T44.W, 0.0, literal.x, BS:VEC_120/SCL_212 4229; EG-NEXT: LSHR T5.W, T42.Y, literal.x, 4230; EG-NEXT: LSHR * T6.W, T44.Y, literal.x, 4231; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4232; EG-NEXT: BFE_INT T58.X, T44.Z, 0.0, literal.x, 4233; EG-NEXT: LSHR T5.Y, T44.W, literal.x, 4234; EG-NEXT: BFE_INT T59.Z, T42.Y, 0.0, literal.x, 4235; EG-NEXT: BFE_INT T57.W, PS, 0.0, literal.x, 4236; EG-NEXT: LSHR * T6.W, T44.X, literal.x, 4237; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4238; EG-NEXT: BFE_INT T59.X, T42.X, 0.0, literal.x, 4239; EG-NEXT: BFE_INT T57.Y, PS, 0.0, literal.x, 4240; EG-NEXT: BFE_INT T60.Z, T42.W, 0.0, literal.x, 4241; EG-NEXT: BFE_INT T58.W, PV.Y, 0.0, literal.x, 4242; EG-NEXT: LSHR * T6.W, T44.Z, literal.x, 4243; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4244; EG-NEXT: BFE_INT T60.X, T42.Z, 0.0, literal.x, 4245; EG-NEXT: BFE_INT T58.Y, PS, 0.0, literal.x, 4246; EG-NEXT: BFE_INT T44.Z, T43.Y, 0.0, literal.x, 4247; EG-NEXT: BFE_INT T59.W, T5.W, 0.0, literal.x, 4248; EG-NEXT: LSHR * T5.W, T42.X, literal.x, 4249; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4250; EG-NEXT: BFE_INT T44.X, T43.X, 0.0, literal.x, 4251; EG-NEXT: BFE_INT T59.Y, PS, 0.0, literal.x, 4252; EG-NEXT: BFE_INT T61.Z, T43.W, 0.0, literal.x, 4253; EG-NEXT: BFE_INT T60.W, T4.Y, 0.0, literal.x, 4254; EG-NEXT: LSHR * T5.W, T42.Z, literal.x, 4255; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4256; EG-NEXT: BFE_INT T61.X, T43.Z, 0.0, literal.x, 4257; EG-NEXT: BFE_INT T60.Y, PS, 0.0, literal.x, 4258; EG-NEXT: BFE_INT T42.Z, T45.Y, 0.0, literal.x, 4259; EG-NEXT: BFE_INT * T44.W, T4.W, 0.0, literal.x, 4260; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4261; EG-NEXT: ALU clause starting at 132: 4262; EG-NEXT: LSHR * T4.W, T43.X, literal.x, 4263; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4264; EG-NEXT: BFE_INT T42.X, T45.X, 0.0, literal.x, 4265; EG-NEXT: BFE_INT T44.Y, PV.W, 0.0, literal.x, 4266; EG-NEXT: BFE_INT T62.Z, T45.W, 0.0, literal.x, 4267; EG-NEXT: BFE_INT T61.W, T3.W, 0.0, literal.x, BS:VEC_120/SCL_212 4268; EG-NEXT: LSHR * T3.W, T43.Z, literal.x, 4269; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4270; EG-NEXT: BFE_INT T62.X, T45.Z, 0.0, literal.x, 4271; EG-NEXT: BFE_INT T61.Y, PS, 0.0, literal.x, 4272; EG-NEXT: BFE_INT T43.Z, T46.Y, 0.0, literal.x, 4273; EG-NEXT: BFE_INT T42.W, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4274; EG-NEXT: LSHR * T3.W, T45.X, literal.x, 4275; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4276; EG-NEXT: BFE_INT T43.X, T46.X, 0.0, literal.x, 4277; EG-NEXT: BFE_INT T42.Y, PS, 0.0, literal.x, 4278; EG-NEXT: BFE_INT T63.Z, T46.W, 0.0, literal.x, 4279; EG-NEXT: BFE_INT T62.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212 4280; EG-NEXT: LSHR * T2.W, T45.Z, literal.x, 4281; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4282; EG-NEXT: BFE_INT T63.X, T46.Z, 0.0, literal.x, 4283; EG-NEXT: BFE_INT T62.Y, PS, 0.0, literal.x, 4284; EG-NEXT: BFE_INT T45.Z, T47.Y, 0.0, literal.x, 4285; EG-NEXT: BFE_INT T43.W, T2.Z, 0.0, literal.x, BS:VEC_120/SCL_212 4286; EG-NEXT: LSHR * T2.W, T46.X, literal.x, 4287; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4288; EG-NEXT: BFE_INT T45.X, T47.X, 0.0, literal.x, 4289; EG-NEXT: BFE_INT T43.Y, PS, 0.0, literal.x, 4290; EG-NEXT: BFE_INT T64.Z, T47.W, 0.0, literal.x, 4291; EG-NEXT: BFE_INT T63.W, T2.Y, 0.0, literal.x, 4292; EG-NEXT: LSHR * T2.W, T46.Z, literal.x, 4293; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4294; EG-NEXT: BFE_INT T64.X, T47.Z, 0.0, literal.x, 4295; EG-NEXT: BFE_INT T63.Y, PS, 0.0, literal.x, 4296; EG-NEXT: BFE_INT T46.Z, T48.Y, 0.0, literal.x, 4297; EG-NEXT: BFE_INT T45.W, T1.W, 0.0, literal.x, 4298; EG-NEXT: LSHR * T1.W, T47.X, literal.x, 4299; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4300; EG-NEXT: BFE_INT T46.X, T48.X, 0.0, literal.x, 4301; EG-NEXT: BFE_INT T45.Y, PS, 0.0, literal.x, 4302; EG-NEXT: BFE_INT T65.Z, T48.W, 0.0, literal.x, 4303; EG-NEXT: BFE_INT T64.W, T1.Z, 0.0, literal.x, 4304; EG-NEXT: LSHR * T1.W, T47.Z, literal.x, 4305; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4306; EG-NEXT: BFE_INT T65.X, T48.Z, 0.0, literal.x, 4307; EG-NEXT: BFE_INT T64.Y, PS, 0.0, literal.x, 4308; EG-NEXT: BFE_INT T47.Z, T41.Y, 0.0, literal.x, 4309; EG-NEXT: BFE_INT T46.W, T1.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4310; EG-NEXT: LSHR * T1.W, T48.X, literal.x, 4311; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4312; EG-NEXT: BFE_INT T47.X, T41.X, 0.0, literal.x, 4313; EG-NEXT: BFE_INT T46.Y, PS, 0.0, literal.x, 4314; EG-NEXT: BFE_INT T66.Z, T41.W, 0.0, literal.x, 4315; EG-NEXT: BFE_INT T65.W, T0.W, 0.0, literal.x, BS:VEC_120/SCL_212 4316; EG-NEXT: LSHR * T0.W, T48.Z, literal.x, 4317; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4318; EG-NEXT: BFE_INT T66.X, T41.Z, 0.0, literal.x, 4319; EG-NEXT: BFE_INT T65.Y, PS, 0.0, literal.x, 4320; EG-NEXT: LSHR T1.Z, T41.X, literal.x, 4321; EG-NEXT: BFE_INT T47.W, T0.Z, 0.0, literal.x, BS:VEC_120/SCL_212 4322; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4323; EG-NEXT: 16(2.242078e-44), 224(3.138909e-43) 4324; EG-NEXT: LSHR T41.X, PS, literal.x, 4325; EG-NEXT: BFE_INT T47.Y, PV.Z, 0.0, literal.y, 4326; EG-NEXT: LSHR T0.Z, T41.Z, literal.y, 4327; EG-NEXT: BFE_INT T66.W, T0.Y, 0.0, literal.y, 4328; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 4329; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4330; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) 4331; EG-NEXT: LSHR T48.X, PS, literal.x, 4332; EG-NEXT: BFE_INT * T66.Y, PV.Z, 0.0, literal.y, 4333; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4334 %load = load <64 x i16>, <64 x i16> addrspace(4)* %in 4335 %ext = sext <64 x i16> %load to <64 x i32> 4336 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 4337 ret void 4338} 4339 4340define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 4341; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i64: 4342; GCN-NOHSA-SI: ; %bb.0: 4343; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4344; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 4345; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 4346; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 4347; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 4348; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4349; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 4350; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 4351; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4352; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 4353; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 4354; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 4355; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4356; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4357; GCN-NOHSA-SI-NEXT: s_endpgm 4358; 4359; GCN-HSA-LABEL: constant_zextload_i16_to_i64: 4360; GCN-HSA: ; %bb.0: 4361; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4362; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4363; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4364; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4365; GCN-HSA-NEXT: flat_load_ushort v0, v[0:1] 4366; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 4367; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 4368; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 4369; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 4370; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 4371; GCN-HSA-NEXT: s_endpgm 4372; 4373; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i64: 4374; GCN-NOHSA-VI: ; %bb.0: 4375; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4376; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 4377; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 4378; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 4379; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 4380; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4381; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 4382; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 4383; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4384; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 4385; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 4386; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 4387; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4388; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 4389; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4390; GCN-NOHSA-VI-NEXT: s_endpgm 4391; 4392; EG-LABEL: constant_zextload_i16_to_i64: 4393; EG: ; %bb.0: 4394; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4395; EG-NEXT: TEX 0 @6 4396; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 4397; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4398; EG-NEXT: CF_END 4399; EG-NEXT: PAD 4400; EG-NEXT: Fetch clause starting at 6: 4401; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 4402; EG-NEXT: ALU clause starting at 8: 4403; EG-NEXT: MOV * T0.X, KC0[2].Z, 4404; EG-NEXT: ALU clause starting at 9: 4405; EG-NEXT: MOV * T0.Y, 0.0, 4406; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 4407; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4408 %a = load i16, i16 addrspace(4)* %in 4409 %ext = zext i16 %a to i64 4410 store i64 %ext, i64 addrspace(1)* %out 4411 ret void 4412} 4413 4414; FIXME: Need to optimize this sequence to avoid extra bfe: 4415; t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64 4416; t31: i64 = any_extend t28 4417; t33: i64 = sign_extend_inreg t31, ValueType:ch:i16 4418; TODO: These could be expanded earlier using ASHR 15 4419define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 4420; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i64: 4421; GCN-NOHSA-SI: ; %bb.0: 4422; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4423; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 4424; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 4425; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 4426; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 4427; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4428; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 4429; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 4430; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 4431; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 4432; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 4433; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4434; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4435; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4436; GCN-NOHSA-SI-NEXT: s_endpgm 4437; 4438; GCN-HSA-LABEL: constant_sextload_i16_to_i64: 4439; GCN-HSA: ; %bb.0: 4440; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4441; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4442; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4443; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4444; GCN-HSA-NEXT: flat_load_sshort v0, v[0:1] 4445; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 4446; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 4447; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 4448; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4449; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 4450; GCN-HSA-NEXT: s_endpgm 4451; 4452; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i64: 4453; GCN-NOHSA-VI: ; %bb.0: 4454; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4455; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 4456; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 4457; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 4458; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 4459; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4460; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 4461; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 4462; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4463; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 4464; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 4465; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4466; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16 4467; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4468; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4469; GCN-NOHSA-VI-NEXT: s_endpgm 4470; 4471; EG-LABEL: constant_sextload_i16_to_i64: 4472; EG: ; %bb.0: 4473; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4474; EG-NEXT: TEX 0 @6 4475; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 4476; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4477; EG-NEXT: CF_END 4478; EG-NEXT: PAD 4479; EG-NEXT: Fetch clause starting at 6: 4480; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 4481; EG-NEXT: ALU clause starting at 8: 4482; EG-NEXT: MOV * T0.X, KC0[2].Z, 4483; EG-NEXT: ALU clause starting at 9: 4484; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 4485; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 4486; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 4487; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 4488; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 4489 %a = load i16, i16 addrspace(4)* %in 4490 %ext = sext i16 %a to i64 4491 store i64 %ext, i64 addrspace(1)* %out 4492 ret void 4493} 4494 4495define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 { 4496; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i64: 4497; GCN-NOHSA-SI: ; %bb.0: 4498; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4499; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 4500; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 4501; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 4502; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 4503; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4504; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 4505; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 4506; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4507; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 4508; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 4509; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 4510; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4511; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4512; GCN-NOHSA-SI-NEXT: s_endpgm 4513; 4514; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i64: 4515; GCN-HSA: ; %bb.0: 4516; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4517; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4518; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4519; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4520; GCN-HSA-NEXT: flat_load_ushort v0, v[0:1] 4521; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 4522; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 4523; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 4524; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 4525; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 4526; GCN-HSA-NEXT: s_endpgm 4527; 4528; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i64: 4529; GCN-NOHSA-VI: ; %bb.0: 4530; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4531; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 4532; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 4533; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 4534; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 4535; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4536; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 4537; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 4538; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4539; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 4540; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 4541; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 4542; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4543; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 4544; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4545; GCN-NOHSA-VI-NEXT: s_endpgm 4546; 4547; EG-LABEL: constant_zextload_v1i16_to_v1i64: 4548; EG: ; %bb.0: 4549; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4550; EG-NEXT: TEX 0 @6 4551; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 4552; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4553; EG-NEXT: CF_END 4554; EG-NEXT: PAD 4555; EG-NEXT: Fetch clause starting at 6: 4556; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 4557; EG-NEXT: ALU clause starting at 8: 4558; EG-NEXT: MOV * T0.X, KC0[2].Z, 4559; EG-NEXT: ALU clause starting at 9: 4560; EG-NEXT: MOV * T0.Y, 0.0, 4561; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 4562; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4563 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in 4564 %ext = zext <1 x i16> %load to <1 x i64> 4565 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 4566 ret void 4567} 4568 4569define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 { 4570; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i64: 4571; GCN-NOHSA-SI: ; %bb.0: 4572; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4573; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 4574; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 4575; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 4576; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 4577; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4578; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 4579; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 4580; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 4581; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 4582; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 4583; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4584; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4585; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4586; GCN-NOHSA-SI-NEXT: s_endpgm 4587; 4588; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i64: 4589; GCN-HSA: ; %bb.0: 4590; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4591; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4592; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4593; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4594; GCN-HSA-NEXT: flat_load_sshort v0, v[0:1] 4595; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 4596; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 4597; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 4598; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4599; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 4600; GCN-HSA-NEXT: s_endpgm 4601; 4602; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i64: 4603; GCN-NOHSA-VI: ; %bb.0: 4604; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4605; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 4606; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 4607; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 4608; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 4609; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4610; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 4611; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 4612; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4613; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 4614; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 4615; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4616; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16 4617; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4618; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4619; GCN-NOHSA-VI-NEXT: s_endpgm 4620; 4621; EG-LABEL: constant_sextload_v1i16_to_v1i64: 4622; EG: ; %bb.0: 4623; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4624; EG-NEXT: TEX 0 @6 4625; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 4626; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4627; EG-NEXT: CF_END 4628; EG-NEXT: PAD 4629; EG-NEXT: Fetch clause starting at 6: 4630; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 4631; EG-NEXT: ALU clause starting at 8: 4632; EG-NEXT: MOV * T0.X, KC0[2].Z, 4633; EG-NEXT: ALU clause starting at 9: 4634; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 4635; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 4636; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 4637; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 4638; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 4639 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in 4640 %ext = sext <1 x i16> %load to <1 x i64> 4641 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 4642 ret void 4643} 4644 4645define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 { 4646; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i64: 4647; GCN-NOHSA-SI: ; %bb.0: 4648; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4649; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4650; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 4651; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 4652; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 4653; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4654; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s2, 16 4655; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s2, 0xffff 4656; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 4657; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 4658; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 4659; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 4660; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4661; GCN-NOHSA-SI-NEXT: s_endpgm 4662; 4663; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i64: 4664; GCN-HSA: ; %bb.0: 4665; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4666; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 4667; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 4668; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4669; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 4670; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4671; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4672; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4673; GCN-HSA-NEXT: s_lshr_b32 s0, s2, 16 4674; GCN-HSA-NEXT: s_and_b32 s1, s2, 0xffff 4675; GCN-HSA-NEXT: v_mov_b32_e32 v0, s1 4676; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 4677; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4678; GCN-HSA-NEXT: s_endpgm 4679; 4680; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i64: 4681; GCN-NOHSA-VI: ; %bb.0: 4682; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4683; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 4684; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 4685; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 4686; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 4687; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4688; GCN-NOHSA-VI-NEXT: s_load_dword s2, s[2:3], 0x0 4689; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 4690; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 4691; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4692; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s2, 16 4693; GCN-NOHSA-VI-NEXT: s_and_b32 s1, s2, 0xffff 4694; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s1 4695; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s0 4696; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 4697; GCN-NOHSA-VI-NEXT: s_endpgm 4698; 4699; EG-LABEL: constant_zextload_v2i16_to_v2i64: 4700; EG: ; %bb.0: 4701; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4702; EG-NEXT: TEX 0 @6 4703; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[] 4704; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 4705; EG-NEXT: CF_END 4706; EG-NEXT: PAD 4707; EG-NEXT: Fetch clause starting at 6: 4708; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 4709; EG-NEXT: ALU clause starting at 8: 4710; EG-NEXT: MOV * T4.X, KC0[2].Z, 4711; EG-NEXT: ALU clause starting at 9: 4712; EG-NEXT: LSHR * T4.Z, T4.X, literal.x, 4713; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4714; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 4715; EG-NEXT: MOV T4.Y, 0.0, 4716; EG-NEXT: MOV T4.W, 0.0, 4717; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 4718; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 4719 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in 4720 %ext = zext <2 x i16> %load to <2 x i64> 4721 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 4722 ret void 4723} 4724 4725define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 { 4726; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i64: 4727; GCN-NOHSA-SI: ; %bb.0: 4728; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4729; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4730; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 4731; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 4732; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4733; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s2, 16 4734; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[2:3], 0x100000 4735; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 4736; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 4737; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 4738; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7 4739; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 4740; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 4741; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4742; GCN-NOHSA-SI-NEXT: s_endpgm 4743; 4744; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i64: 4745; GCN-HSA: ; %bb.0: 4746; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4747; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4748; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 4749; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4750; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4751; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4752; GCN-HSA-NEXT: s_lshr_b32 s0, s2, 16 4753; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 4754; GCN-HSA-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x100000 4755; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4756; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4757; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 4758; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 4759; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4760; GCN-HSA-NEXT: s_endpgm 4761; 4762; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i64: 4763; GCN-NOHSA-VI: ; %bb.0: 4764; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4765; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4766; GCN-NOHSA-VI-NEXT: s_load_dword s4, s[2:3], 0x0 4767; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 4768; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 4769; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4770; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[4:5], 0x100000 4771; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 4772; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 4773; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 4774; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7 4775; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 4776; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 4777; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4778; GCN-NOHSA-VI-NEXT: s_endpgm 4779; 4780; EG-LABEL: constant_sextload_v2i16_to_v2i64: 4781; EG: ; %bb.0: 4782; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4783; EG-NEXT: TEX 0 @6 4784; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 4785; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 4786; EG-NEXT: CF_END 4787; EG-NEXT: PAD 4788; EG-NEXT: Fetch clause starting at 6: 4789; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 4790; EG-NEXT: ALU clause starting at 8: 4791; EG-NEXT: MOV * T4.X, KC0[2].Z, 4792; EG-NEXT: ALU clause starting at 9: 4793; EG-NEXT: ASHR * T4.W, T4.X, literal.x, 4794; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 4795; EG-NEXT: ASHR * T4.Z, T4.X, literal.x, 4796; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4797; EG-NEXT: BFE_INT T4.X, T4.X, 0.0, literal.x, 4798; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 4799; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 4800; EG-NEXT: ASHR * T4.Y, PV.X, literal.x, 4801; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 4802 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in 4803 %ext = sext <2 x i16> %load to <2 x i64> 4804 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 4805 ret void 4806} 4807 4808define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 { 4809; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i64: 4810; GCN-NOHSA-SI: ; %bb.0: 4811; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4812; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4813; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 4814; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 4815; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 4816; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 4817; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 4818; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4819; GCN-NOHSA-SI-NEXT: s_lshr_b32 s6, s5, 16 4820; GCN-NOHSA-SI-NEXT: s_lshr_b32 s7, s4, 16 4821; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 4822; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 4823; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 4824; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 4825; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4826; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4827; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 4828; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 4829; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4830; GCN-NOHSA-SI-NEXT: s_endpgm 4831; 4832; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i64: 4833; GCN-HSA: ; %bb.0: 4834; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4835; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 4836; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 4837; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4838; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 4839; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4840; GCN-HSA-NEXT: s_lshr_b32 s4, s3, 16 4841; GCN-HSA-NEXT: s_lshr_b32 s5, s2, 16 4842; GCN-HSA-NEXT: s_and_b32 s6, s2, 0xffff 4843; GCN-HSA-NEXT: s_and_b32 s2, s3, 0xffff 4844; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4845; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 4846; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4847; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 4848; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 4849; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 4850; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4851; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4852; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 4853; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 4854; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4855; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4856; GCN-HSA-NEXT: s_endpgm 4857; 4858; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i64: 4859; GCN-NOHSA-VI: ; %bb.0: 4860; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4861; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 4862; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 4863; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 4864; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 4865; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4866; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 4867; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 4868; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 4869; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4870; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s2, 16 4871; GCN-NOHSA-VI-NEXT: s_and_b32 s1, s2, 0xffff 4872; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s3, 16 4873; GCN-NOHSA-VI-NEXT: s_and_b32 s3, s3, 0xffff 4874; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s3 4875; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2 4876; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 4877; GCN-NOHSA-VI-NEXT: s_nop 0 4878; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s1 4879; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s0 4880; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 4881; GCN-NOHSA-VI-NEXT: s_endpgm 4882; 4883; EG-LABEL: constant_zextload_v4i16_to_v4i64: 4884; EG: ; %bb.0: 4885; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4886; EG-NEXT: TEX 0 @6 4887; EG-NEXT: ALU 18, @9, KC0[CB0:0-32], KC1[] 4888; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T8.X, 0 4889; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T7.X, 1 4890; EG-NEXT: CF_END 4891; EG-NEXT: Fetch clause starting at 6: 4892; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 4893; EG-NEXT: ALU clause starting at 8: 4894; EG-NEXT: MOV * T5.X, KC0[2].Z, 4895; EG-NEXT: ALU clause starting at 9: 4896; EG-NEXT: MOV T2.X, T5.X, 4897; EG-NEXT: MOV * T3.X, T5.Y, 4898; EG-NEXT: MOV T0.Y, PV.X, 4899; EG-NEXT: MOV * T0.Z, PS, 4900; EG-NEXT: LSHR * T5.Z, PV.Z, literal.x, 4901; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4902; EG-NEXT: AND_INT T5.X, T0.Z, literal.x, 4903; EG-NEXT: MOV T5.Y, 0.0, 4904; EG-NEXT: LSHR T6.Z, T0.Y, literal.y, 4905; EG-NEXT: AND_INT * T6.X, T0.Y, literal.x, 4906; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 4907; EG-NEXT: MOV T6.Y, 0.0, 4908; EG-NEXT: MOV T5.W, 0.0, 4909; EG-NEXT: MOV * T6.W, 0.0, 4910; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 4911; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4912; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4913; EG-NEXT: LSHR * T8.X, PV.W, literal.x, 4914; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4915 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in 4916 %ext = zext <4 x i16> %load to <4 x i64> 4917 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 4918 ret void 4919} 4920 4921define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 { 4922; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i64: 4923; GCN-NOHSA-SI: ; %bb.0: 4924; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4925; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4926; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 4927; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 4928; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 4929; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4930; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, s5 4931; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s4, 16 4932; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[4:5], 0x100000 4933; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 4934; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 4935; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 4936; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 4937; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7 4938; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 4939; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 4940; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4941; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4942; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 4943; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11 4944; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8 4945; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9 4946; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4947; GCN-NOHSA-SI-NEXT: s_endpgm 4948; 4949; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i64: 4950; GCN-HSA: ; %bb.0: 4951; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4952; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4953; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 4954; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4955; GCN-HSA-NEXT: s_mov_b32 s4, s3 4956; GCN-HSA-NEXT: s_lshr_b32 s6, s2, 16 4957; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[2:3], 0x100000 4958; GCN-HSA-NEXT: s_ashr_i64 s[2:3], s[2:3], 48 4959; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 4960; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 4961; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 4962; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 4963; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 4964; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4965; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 4966; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 4967; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 4968; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 4969; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4970; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4971; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 4972; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 4973; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 4974; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 4975; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4976; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4977; GCN-HSA-NEXT: s_endpgm 4978; 4979; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i64: 4980; GCN-NOHSA-VI: ; %bb.0: 4981; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4982; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4983; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 4984; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 4985; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 4986; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4987; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s5 4988; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s5, 16 4989; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[4:5], 0x100000 4990; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 4991; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 4992; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 4993; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 4994; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 4995; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 4996; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 4997; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11 4998; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4999; GCN-NOHSA-VI-NEXT: s_nop 0 5000; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 5001; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7 5002; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 5003; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 5004; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5005; GCN-NOHSA-VI-NEXT: s_endpgm 5006; 5007; EG-LABEL: constant_sextload_v4i16_to_v4i64: 5008; EG: ; %bb.0: 5009; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5010; EG-NEXT: TEX 0 @6 5011; EG-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[] 5012; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0 5013; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 5014; EG-NEXT: CF_END 5015; EG-NEXT: Fetch clause starting at 6: 5016; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 5017; EG-NEXT: ALU clause starting at 8: 5018; EG-NEXT: MOV * T5.X, KC0[2].Z, 5019; EG-NEXT: ALU clause starting at 9: 5020; EG-NEXT: MOV T2.X, T5.X, 5021; EG-NEXT: MOV * T3.X, T5.Y, 5022; EG-NEXT: MOV T0.Y, PS, 5023; EG-NEXT: MOV * T0.Z, PV.X, 5024; EG-NEXT: ASHR * T5.W, PV.Z, literal.x, 5025; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5026; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 5027; EG-NEXT: ASHR T5.Z, T0.Z, literal.y, 5028; EG-NEXT: ASHR * T7.W, T0.Y, literal.z, 5029; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5030; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5031; EG-NEXT: BFE_INT T5.X, T0.Z, 0.0, literal.x, 5032; EG-NEXT: ASHR * T7.Z, T0.Y, literal.x, 5033; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5034; EG-NEXT: BFE_INT T7.X, T0.Y, 0.0, literal.x, 5035; EG-NEXT: ASHR T5.Y, PV.X, literal.y, 5036; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 5037; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5038; EG-NEXT: LSHR T8.X, PV.W, literal.x, 5039; EG-NEXT: ASHR * T7.Y, PV.X, literal.y, 5040; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5041 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in 5042 %ext = sext <4 x i16> %load to <4 x i64> 5043 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 5044 ret void 5045} 5046 5047define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 { 5048; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i64: 5049; GCN-NOHSA-SI: ; %bb.0: 5050; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5051; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5052; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 5053; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 5054; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5055; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 5056; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 5057; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5058; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s5, 16 5059; GCN-NOHSA-SI-NEXT: s_lshr_b32 s9, s7, 16 5060; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s6, 16 5061; GCN-NOHSA-SI-NEXT: s_lshr_b32 s11, s4, 16 5062; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 5063; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, 0xffff 5064; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, 0xffff 5065; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 5066; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s7 5067; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 5068; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5069; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5070; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 5071; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8 5072; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5073; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5074; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 5075; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s10 5076; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5077; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5078; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 5079; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 5080; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5081; GCN-NOHSA-SI-NEXT: s_endpgm 5082; 5083; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i64: 5084; GCN-HSA: ; %bb.0: 5085; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5086; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5087; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 5088; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5089; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 5090; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5091; GCN-HSA-NEXT: s_lshr_b32 s8, s5, 16 5092; GCN-HSA-NEXT: s_lshr_b32 s2, s7, 16 5093; GCN-HSA-NEXT: s_lshr_b32 s9, s6, 16 5094; GCN-HSA-NEXT: s_lshr_b32 s10, s4, 16 5095; GCN-HSA-NEXT: s_and_b32 s4, s4, 0xffff 5096; GCN-HSA-NEXT: s_and_b32 s6, s6, 0xffff 5097; GCN-HSA-NEXT: s_and_b32 s5, s5, 0xffff 5098; GCN-HSA-NEXT: s_and_b32 s3, s7, 0xffff 5099; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 5100; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 5101; GCN-HSA-NEXT: v_mov_b32_e32 v0, s3 5102; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5103; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5104; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5105; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5106; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5107; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5108; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5109; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5110; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 5111; GCN-HSA-NEXT: v_mov_b32_e32 v0, s5 5112; GCN-HSA-NEXT: v_mov_b32_e32 v2, s8 5113; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5114; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5115; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5116; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 5117; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 5118; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5119; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5120; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5121; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 5122; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 5123; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5124; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5125; GCN-HSA-NEXT: s_endpgm 5126; 5127; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i64: 5128; GCN-NOHSA-VI: ; %bb.0: 5129; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 5130; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5131; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 5132; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5133; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 5134; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5135; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5136; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5137; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s7, 0xffff 5138; GCN-NOHSA-VI-NEXT: s_lshr_b32 s7, s7, 16 5139; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s6, 0xffff 5140; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16 5141; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s11 5142; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 5143; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s5, 0xffff 5144; GCN-NOHSA-VI-NEXT: s_lshr_b32 s5, s5, 16 5145; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5146; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s4, 0xffff 5147; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 5148; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 5149; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 5150; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5151; GCN-NOHSA-VI-NEXT: s_nop 0 5152; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s9 5153; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 5154; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5155; GCN-NOHSA-VI-NEXT: s_nop 0 5156; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 5157; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 5158; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5159; GCN-NOHSA-VI-NEXT: s_endpgm 5160; 5161; EG-LABEL: constant_zextload_v8i16_to_v8i64: 5162; EG: ; %bb.0: 5163; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 5164; EG-NEXT: TEX 0 @8 5165; EG-NEXT: ALU 30, @11, KC0[CB0:0-32], KC1[] 5166; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T14.X, 0 5167; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T13.X, 0 5168; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T12.X, 0 5169; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 1 5170; EG-NEXT: CF_END 5171; EG-NEXT: Fetch clause starting at 8: 5172; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 5173; EG-NEXT: ALU clause starting at 10: 5174; EG-NEXT: MOV * T7.X, KC0[2].Z, 5175; EG-NEXT: ALU clause starting at 11: 5176; EG-NEXT: LSHR * T8.Z, T7.W, literal.x, 5177; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5178; EG-NEXT: AND_INT T8.X, T7.W, literal.x, 5179; EG-NEXT: MOV T8.Y, 0.0, 5180; EG-NEXT: LSHR T9.Z, T7.Z, literal.y, 5181; EG-NEXT: AND_INT * T9.X, T7.Z, literal.x, 5182; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5183; EG-NEXT: MOV T9.Y, 0.0, 5184; EG-NEXT: LSHR * T10.Z, T7.Y, literal.x, 5185; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5186; EG-NEXT: AND_INT T10.X, T7.Y, literal.x, 5187; EG-NEXT: MOV T10.Y, 0.0, 5188; EG-NEXT: LSHR T7.Z, T7.X, literal.y, 5189; EG-NEXT: AND_INT * T7.X, T7.X, literal.x, 5190; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5191; EG-NEXT: MOV T7.Y, 0.0, 5192; EG-NEXT: MOV T8.W, 0.0, 5193; EG-NEXT: MOV * T9.W, 0.0, 5194; EG-NEXT: MOV T10.W, 0.0, 5195; EG-NEXT: MOV * T7.W, 0.0, 5196; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x, 5197; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5198; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5199; EG-NEXT: LSHR T12.X, PV.W, literal.x, 5200; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5201; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 5202; EG-NEXT: LSHR T13.X, PV.W, literal.x, 5203; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5204; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 5205; EG-NEXT: LSHR * T14.X, PV.W, literal.x, 5206; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5207 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in 5208 %ext = zext <8 x i16> %load to <8 x i64> 5209 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 5210 ret void 5211} 5212 5213define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 { 5214; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i64: 5215; GCN-NOHSA-SI: ; %bb.0: 5216; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5217; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5218; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 5219; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 5220; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 5221; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5222; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s7 5223; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s5 5224; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s6, 16 5225; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s4, 16 5226; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[4:5], 0x100000 5227; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[6:7], 0x100000 5228; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 5229; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 5230; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 5231; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 5232; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 5233; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 5234; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 5235; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s9 5236; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 5237; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 5238; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5239; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5240; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 5241; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11 5242; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 5243; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 5244; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5245; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5246; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 5247; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 5248; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s16 5249; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s17 5250; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 5251; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13 5252; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5253; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s14 5254; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s15 5255; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 5256; GCN-NOHSA-SI-NEXT: s_endpgm 5257; 5258; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i64: 5259; GCN-HSA: ; %bb.0: 5260; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5261; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5262; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 5263; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5264; GCN-HSA-NEXT: s_mov_b32 s2, s7 5265; GCN-HSA-NEXT: s_mov_b32 s8, s5 5266; GCN-HSA-NEXT: s_lshr_b32 s10, s6, 16 5267; GCN-HSA-NEXT: s_lshr_b32 s12, s4, 16 5268; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[4:5], 0x100000 5269; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x100000 5270; GCN-HSA-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 5271; GCN-HSA-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 5272; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 5273; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 5274; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 5275; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 5276; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5277; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 5278; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5279; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5280; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5281; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5282; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5283; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 5284; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 5285; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5286; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5287; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5288; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5289; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 5290; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 5291; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 5292; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 5293; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5 5294; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5295; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5296; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5297; GCN-HSA-NEXT: v_mov_b32_e32 v0, s16 5298; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17 5299; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 5300; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11 5301; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5302; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5303; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5304; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 5305; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15 5306; GCN-HSA-NEXT: v_mov_b32_e32 v2, s12 5307; GCN-HSA-NEXT: v_mov_b32_e32 v3, s13 5308; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5309; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5310; GCN-HSA-NEXT: s_endpgm 5311; 5312; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i64: 5313; GCN-NOHSA-VI: ; %bb.0: 5314; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 5315; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5316; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 5317; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5318; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5319; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5320; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[6:7], 0x100000 5321; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16 5322; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x100000 5323; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s7 5324; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[6:7], 0x100000 5325; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s7, 16 5326; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 5327; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s5 5328; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s5, 16 5329; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 5330; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19 5331; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 5332; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 5333; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[4:5], 0x100000 5334; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 5335; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 5336; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 5337; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5338; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 5339; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 5340; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15 5341; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16 5342; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17 5343; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5344; GCN-NOHSA-VI-NEXT: s_nop 0 5345; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 5346; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s11 5347; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s12 5348; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s13 5349; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5350; GCN-NOHSA-VI-NEXT: s_nop 0 5351; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 5352; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 5353; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 5354; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 5355; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5356; GCN-NOHSA-VI-NEXT: s_endpgm 5357; 5358; EG-LABEL: constant_sextload_v8i16_to_v8i64: 5359; EG: ; %bb.0: 5360; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 5361; EG-NEXT: TEX 0 @8 5362; EG-NEXT: ALU 33, @11, KC0[CB0:0-32], KC1[] 5363; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0 5364; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 0 5365; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T9.X, 0 5366; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 1 5367; EG-NEXT: CF_END 5368; EG-NEXT: Fetch clause starting at 8: 5369; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 5370; EG-NEXT: ALU clause starting at 10: 5371; EG-NEXT: MOV * T7.X, KC0[2].Z, 5372; EG-NEXT: ALU clause starting at 11: 5373; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x, 5374; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5375; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5376; EG-NEXT: LSHR T9.X, PV.W, literal.x, 5377; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 5378; EG-NEXT: ASHR * T10.W, T7.X, literal.z, 5379; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 5380; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5381; EG-NEXT: LSHR T11.X, PV.W, literal.x, 5382; EG-NEXT: ASHR T10.Z, T7.X, literal.y, 5383; EG-NEXT: ASHR * T12.W, T7.Y, literal.z, 5384; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5385; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5386; EG-NEXT: BFE_INT T10.X, T7.X, 0.0, literal.x, 5387; EG-NEXT: ASHR T12.Z, T7.Y, literal.x, 5388; EG-NEXT: ASHR * T13.W, T7.Z, literal.y, 5389; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5390; EG-NEXT: BFE_INT T12.X, T7.Y, 0.0, literal.x, 5391; EG-NEXT: ASHR T10.Y, PV.X, literal.y, 5392; EG-NEXT: ASHR T13.Z, T7.Z, literal.x, 5393; EG-NEXT: ASHR * T14.W, T7.W, literal.y, 5394; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5395; EG-NEXT: BFE_INT T13.X, T7.Z, 0.0, literal.x, 5396; EG-NEXT: ASHR T12.Y, PV.X, literal.y, 5397; EG-NEXT: ASHR * T14.Z, T7.W, literal.x, 5398; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5399; EG-NEXT: BFE_INT T14.X, T7.W, 0.0, literal.x, 5400; EG-NEXT: ASHR T13.Y, PV.X, literal.y, 5401; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 5402; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5403; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 5404; EG-NEXT: LSHR T7.X, PV.W, literal.x, 5405; EG-NEXT: ASHR * T14.Y, PV.X, literal.y, 5406; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5407 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in 5408 %ext = sext <8 x i16> %load to <8 x i64> 5409 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 5410 ret void 5411} 5412 5413define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 { 5414; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i64: 5415; GCN-NOHSA-SI: ; %bb.0: 5416; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5417; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5418; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 5419; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 5420; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5421; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 5422; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 5423; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5424; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s5, 16 5425; GCN-NOHSA-SI-NEXT: s_lshr_b32 s13, s7, 16 5426; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s11, 16 5427; GCN-NOHSA-SI-NEXT: s_lshr_b32 s15, s9, 16 5428; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s8, 16 5429; GCN-NOHSA-SI-NEXT: s_lshr_b32 s17, s10, 16 5430; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s6, 16 5431; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s4, 16 5432; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 5433; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, 0xffff 5434; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, 0xffff 5435; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, 0xffff 5436; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 5437; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, 0xffff 5438; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, 0xffff 5439; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, 0xffff 5440; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s9 5441; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 5442; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 5443; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5444; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s11 5445; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s14 5446; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 5447; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5448; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s7 5449; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 5450; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5451; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5452; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 5453; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 5454; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5455; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5456; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 5457; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s16 5458; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 5459; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5460; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 5461; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s17 5462; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 5463; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5464; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 5465; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s18 5466; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5467; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5468; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 5469; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s19 5470; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5471; GCN-NOHSA-SI-NEXT: s_endpgm 5472; 5473; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i64: 5474; GCN-HSA: ; %bb.0: 5475; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5476; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5477; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 5478; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5479; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 5480; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5481; GCN-HSA-NEXT: s_lshr_b32 s12, s5, 16 5482; GCN-HSA-NEXT: s_lshr_b32 s13, s7, 16 5483; GCN-HSA-NEXT: s_lshr_b32 s14, s11, 16 5484; GCN-HSA-NEXT: s_lshr_b32 s2, s9, 16 5485; GCN-HSA-NEXT: s_lshr_b32 s15, s8, 16 5486; GCN-HSA-NEXT: s_lshr_b32 s16, s10, 16 5487; GCN-HSA-NEXT: s_lshr_b32 s17, s6, 16 5488; GCN-HSA-NEXT: s_lshr_b32 s18, s4, 16 5489; GCN-HSA-NEXT: s_and_b32 s4, s4, 0xffff 5490; GCN-HSA-NEXT: s_and_b32 s6, s6, 0xffff 5491; GCN-HSA-NEXT: s_and_b32 s10, s10, 0xffff 5492; GCN-HSA-NEXT: s_and_b32 s8, s8, 0xffff 5493; GCN-HSA-NEXT: s_and_b32 s5, s5, 0xffff 5494; GCN-HSA-NEXT: s_and_b32 s7, s7, 0xffff 5495; GCN-HSA-NEXT: s_and_b32 s11, s11, 0xffff 5496; GCN-HSA-NEXT: s_and_b32 s3, s9, 0xffff 5497; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 5498; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 5499; GCN-HSA-NEXT: v_mov_b32_e32 v0, s3 5500; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5501; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5502; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5503; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 5504; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5505; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5506; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5507; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5508; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 5509; GCN-HSA-NEXT: v_mov_b32_e32 v0, s11 5510; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14 5511; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5512; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5513; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5514; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5515; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5516; GCN-HSA-NEXT: v_mov_b32_e32 v0, s7 5517; GCN-HSA-NEXT: v_mov_b32_e32 v2, s13 5518; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5519; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5520; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5521; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5522; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 5523; GCN-HSA-NEXT: v_mov_b32_e32 v0, s5 5524; GCN-HSA-NEXT: v_mov_b32_e32 v2, s12 5525; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5526; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5527; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5528; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5529; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 5530; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 5531; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15 5532; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5533; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5534; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5535; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5536; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 5537; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 5538; GCN-HSA-NEXT: v_mov_b32_e32 v2, s16 5539; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5540; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5541; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5542; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 5543; GCN-HSA-NEXT: v_mov_b32_e32 v2, s17 5544; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5545; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5546; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5547; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 5548; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18 5549; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5550; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5551; GCN-HSA-NEXT: s_endpgm 5552; 5553; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i64: 5554; GCN-NOHSA-VI: ; %bb.0: 5555; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 5556; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5557; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 5558; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5559; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 5560; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5561; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5562; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5563; GCN-NOHSA-VI-NEXT: s_and_b32 s19, s9, 0xffff 5564; GCN-NOHSA-VI-NEXT: s_lshr_b32 s9, s9, 16 5565; GCN-NOHSA-VI-NEXT: s_and_b32 s18, s8, 0xffff 5566; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s8, 16 5567; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s19 5568; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 5569; GCN-NOHSA-VI-NEXT: s_and_b32 s17, s11, 0xffff 5570; GCN-NOHSA-VI-NEXT: s_lshr_b32 s11, s11, 16 5571; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 5572; GCN-NOHSA-VI-NEXT: s_and_b32 s16, s10, 0xffff 5573; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 5574; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s8 5575; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s10, 16 5576; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 5577; GCN-NOHSA-VI-NEXT: s_and_b32 s15, s7, 0xffff 5578; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s17 5579; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 5580; GCN-NOHSA-VI-NEXT: s_lshr_b32 s7, s7, 16 5581; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 5582; GCN-NOHSA-VI-NEXT: s_and_b32 s14, s6, 0xffff 5583; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16 5584; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 5585; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16 5586; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 5587; GCN-NOHSA-VI-NEXT: s_and_b32 s13, s5, 0xffff 5588; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s15 5589; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 5590; GCN-NOHSA-VI-NEXT: s_lshr_b32 s5, s5, 16 5591; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5592; GCN-NOHSA-VI-NEXT: s_and_b32 s12, s4, 0xffff 5593; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 5594; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 5595; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 5596; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5597; GCN-NOHSA-VI-NEXT: s_nop 0 5598; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s13 5599; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 5600; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5601; GCN-NOHSA-VI-NEXT: s_nop 0 5602; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 5603; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 5604; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5605; GCN-NOHSA-VI-NEXT: s_endpgm 5606; 5607; EG-LABEL: constant_zextload_v16i16_to_v16i64: 5608; EG: ; %bb.0: 5609; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 5610; EG-NEXT: TEX 1 @12 5611; EG-NEXT: ALU 62, @17, KC0[CB0:0-32], KC1[] 5612; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T26.X, 0 5613; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0 5614; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0 5615; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T23.X, 0 5616; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T22.X, 0 5617; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T21.X, 0 5618; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T20.X, 0 5619; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 1 5620; EG-NEXT: CF_END 5621; EG-NEXT: Fetch clause starting at 12: 5622; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 5623; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 5624; EG-NEXT: ALU clause starting at 16: 5625; EG-NEXT: MOV * T11.X, KC0[2].Z, 5626; EG-NEXT: ALU clause starting at 17: 5627; EG-NEXT: LSHR * T13.Z, T12.W, literal.x, 5628; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5629; EG-NEXT: AND_INT T13.X, T12.W, literal.x, 5630; EG-NEXT: MOV T13.Y, 0.0, 5631; EG-NEXT: LSHR T14.Z, T12.Z, literal.y, 5632; EG-NEXT: AND_INT * T14.X, T12.Z, literal.x, 5633; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5634; EG-NEXT: MOV T14.Y, 0.0, 5635; EG-NEXT: LSHR * T15.Z, T12.Y, literal.x, 5636; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5637; EG-NEXT: AND_INT T15.X, T12.Y, literal.x, 5638; EG-NEXT: MOV T15.Y, 0.0, 5639; EG-NEXT: LSHR T12.Z, T12.X, literal.y, 5640; EG-NEXT: AND_INT * T12.X, T12.X, literal.x, 5641; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5642; EG-NEXT: MOV T12.Y, 0.0, 5643; EG-NEXT: LSHR * T16.Z, T11.W, literal.x, 5644; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5645; EG-NEXT: AND_INT T16.X, T11.W, literal.x, 5646; EG-NEXT: MOV T16.Y, 0.0, 5647; EG-NEXT: LSHR T17.Z, T11.Z, literal.y, 5648; EG-NEXT: AND_INT * T17.X, T11.Z, literal.x, 5649; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5650; EG-NEXT: MOV T17.Y, 0.0, 5651; EG-NEXT: LSHR * T18.Z, T11.Y, literal.x, 5652; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5653; EG-NEXT: AND_INT T18.X, T11.Y, literal.x, 5654; EG-NEXT: MOV T18.Y, 0.0, 5655; EG-NEXT: LSHR T11.Z, T11.X, literal.y, 5656; EG-NEXT: AND_INT * T11.X, T11.X, literal.x, 5657; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5658; EG-NEXT: MOV T11.Y, 0.0, 5659; EG-NEXT: MOV T13.W, 0.0, 5660; EG-NEXT: MOV * T14.W, 0.0, 5661; EG-NEXT: MOV T15.W, 0.0, 5662; EG-NEXT: MOV * T12.W, 0.0, 5663; EG-NEXT: MOV T16.W, 0.0, 5664; EG-NEXT: MOV * T17.W, 0.0, 5665; EG-NEXT: MOV T18.W, 0.0, 5666; EG-NEXT: MOV * T11.W, 0.0, 5667; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x, 5668; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5669; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5670; EG-NEXT: LSHR T20.X, PV.W, literal.x, 5671; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5672; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 5673; EG-NEXT: LSHR T21.X, PV.W, literal.x, 5674; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5675; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 5676; EG-NEXT: LSHR T22.X, PV.W, literal.x, 5677; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5678; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 5679; EG-NEXT: LSHR T23.X, PV.W, literal.x, 5680; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5681; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 5682; EG-NEXT: LSHR T24.X, PV.W, literal.x, 5683; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5684; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 5685; EG-NEXT: LSHR T25.X, PV.W, literal.x, 5686; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5687; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 5688; EG-NEXT: LSHR * T26.X, PV.W, literal.x, 5689; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5690 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in 5691 %ext = zext <16 x i16> %load to <16 x i64> 5692 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 5693 ret void 5694} 5695 5696define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 { 5697; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i64: 5698; GCN-NOHSA-SI: ; %bb.0: 5699; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5700; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5701; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 5702; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 5703; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 5704; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5705; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, s11 5706; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, s9 5707; GCN-NOHSA-SI-NEXT: s_mov_b32 s16, s7 5708; GCN-NOHSA-SI-NEXT: s_mov_b32 s18, s5 5709; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s10, 16 5710; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s8, 16 5711; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s6, 16 5712; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s4, 16 5713; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[28:29], s[4:5], 0x100000 5714; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[30:31], s[6:7], 0x100000 5715; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[34:35], s[8:9], 0x100000 5716; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[36:37], s[10:11], 0x100000 5717; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 5718; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 5719; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[8:9], s[8:9], 48 5720; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[10:11], s[10:11], 48 5721; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000 5722; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000 5723; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 5724; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 5725; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x100000 5726; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000 5727; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x100000 5728; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000 5729; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 5730; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s13 5731; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s10 5732; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s11 5733; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 5734; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5735; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 5736; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s15 5737; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8 5738; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9 5739; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 5740; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5741; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s16 5742; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s17 5743; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 5744; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 5745; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5746; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5747; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 5748; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 5749; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 5750; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 5751; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5752; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5753; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36 5754; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s37 5755; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s34 5756; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s35 5757; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s30 5758; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s31 5759; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s28 5760; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s29 5761; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20 5762; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s21 5763; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 5764; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s22 5765; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s23 5766; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64 5767; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s24 5768; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s25 5769; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32 5770; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s26 5771; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s27 5772; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 5773; GCN-NOHSA-SI-NEXT: s_endpgm 5774; 5775; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i64: 5776; GCN-HSA: ; %bb.0: 5777; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5778; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5779; GCN-HSA-NEXT: s_load_dwordx8 s[8:15], s[2:3], 0x0 5780; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5781; GCN-HSA-NEXT: s_mov_b32 s6, s15 5782; GCN-HSA-NEXT: s_mov_b32 s16, s13 5783; GCN-HSA-NEXT: s_mov_b32 s18, s11 5784; GCN-HSA-NEXT: s_mov_b32 s20, s9 5785; GCN-HSA-NEXT: s_lshr_b32 s22, s14, 16 5786; GCN-HSA-NEXT: s_lshr_b32 s24, s12, 16 5787; GCN-HSA-NEXT: s_lshr_b32 s26, s10, 16 5788; GCN-HSA-NEXT: s_lshr_b32 s28, s8, 16 5789; GCN-HSA-NEXT: s_bfe_i64 s[34:35], s[14:15], 0x100000 5790; GCN-HSA-NEXT: s_ashr_i64 s[14:15], s[14:15], 48 5791; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 5792; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[8:9], 0x100000 5793; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[10:11], 0x100000 5794; GCN-HSA-NEXT: s_bfe_i64 s[30:31], s[12:13], 0x100000 5795; GCN-HSA-NEXT: s_ashr_i64 s[8:9], s[8:9], 48 5796; GCN-HSA-NEXT: s_ashr_i64 s[10:11], s[10:11], 48 5797; GCN-HSA-NEXT: s_ashr_i64 s[12:13], s[12:13], 48 5798; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 5799; GCN-HSA-NEXT: v_mov_b32_e32 v1, s7 5800; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14 5801; GCN-HSA-NEXT: v_mov_b32_e32 v3, s15 5802; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[28:29], 0x100000 5803; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[26:27], 0x100000 5804; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000 5805; GCN-HSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x100000 5806; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000 5807; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000 5808; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000 5809; GCN-HSA-NEXT: s_add_u32 s26, s0, 0x70 5810; GCN-HSA-NEXT: s_addc_u32 s27, s1, 0 5811; GCN-HSA-NEXT: v_mov_b32_e32 v8, s26 5812; GCN-HSA-NEXT: v_mov_b32_e32 v6, s12 5813; GCN-HSA-NEXT: s_add_u32 s12, s0, 0x50 5814; GCN-HSA-NEXT: v_mov_b32_e32 v9, s27 5815; GCN-HSA-NEXT: v_mov_b32_e32 v7, s13 5816; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0 5817; GCN-HSA-NEXT: v_mov_b32_e32 v10, s12 5818; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 5819; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17 5820; GCN-HSA-NEXT: v_mov_b32_e32 v11, s13 5821; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 5822; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 5823; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 5824; GCN-HSA-NEXT: s_add_u32 s10, s0, 48 5825; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11 5826; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 5827; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 5828; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18 5829; GCN-HSA-NEXT: v_mov_b32_e32 v1, s19 5830; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 5831; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5832; GCN-HSA-NEXT: s_nop 0 5833; GCN-HSA-NEXT: v_mov_b32_e32 v2, s8 5834; GCN-HSA-NEXT: s_add_u32 s8, s0, 16 5835; GCN-HSA-NEXT: v_mov_b32_e32 v3, s9 5836; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 5837; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8 5838; GCN-HSA-NEXT: v_mov_b32_e32 v0, s20 5839; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 5840; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9 5841; GCN-HSA-NEXT: s_add_u32 s8, s0, 0x60 5842; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5843; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 5844; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8 5845; GCN-HSA-NEXT: v_mov_b32_e32 v0, s34 5846; GCN-HSA-NEXT: v_mov_b32_e32 v1, s35 5847; GCN-HSA-NEXT: v_mov_b32_e32 v2, s22 5848; GCN-HSA-NEXT: v_mov_b32_e32 v3, s23 5849; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9 5850; GCN-HSA-NEXT: s_add_u32 s8, s0, 64 5851; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5852; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 5853; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8 5854; GCN-HSA-NEXT: v_mov_b32_e32 v0, s30 5855; GCN-HSA-NEXT: v_mov_b32_e32 v1, s31 5856; GCN-HSA-NEXT: v_mov_b32_e32 v2, s24 5857; GCN-HSA-NEXT: v_mov_b32_e32 v3, s25 5858; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9 5859; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5860; GCN-HSA-NEXT: s_nop 0 5861; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 5862; GCN-HSA-NEXT: s_add_u32 s4, s0, 32 5863; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 5864; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 5865; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 5866; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14 5867; GCN-HSA-NEXT: v_mov_b32_e32 v3, s15 5868; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 5869; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5870; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5871; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5872; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5873; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 5874; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 5875; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5876; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5877; GCN-HSA-NEXT: s_endpgm 5878; 5879; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i64: 5880; GCN-NOHSA-VI: ; %bb.0: 5881; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x24 5882; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5883; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 5884; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, 0xf000 5885; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, -1 5886; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5887; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[4:5], 0x100000 5888; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 5889; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[4:5], 0x100000 5890; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s5 5891; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[6:7], 0x100000 5892; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16 5893; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[36:37], s[4:5], 0x100000 5894; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s5, 16 5895; GCN-NOHSA-VI-NEXT: s_mov_b32 s14, s1 5896; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s1, 16 5897; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[6:7], 0x100000 5898; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s7 5899; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 5900; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[14:15], 0x100000 5901; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[18:19], 0x100000 5902; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[2:3], 0x100000 5903; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s2, 16 5904; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[28:29], s[6:7], 0x100000 5905; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s7, 16 5906; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36 5907; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s37 5908; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 5909; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 5910; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[2:3], 0x100000 5911; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, s3 5912; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 5913; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:80 5914; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[2:3], 0x100000 5915; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30 5916; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31 5917; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s34 5918; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 5919; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s3, 16 5920; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:64 5921; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 5922; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 5923; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29 5924; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 5925; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 5926; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:112 5927; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[0:1], 0x100000 5928; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 5929; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25 5930; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s26 5931; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s27 5932; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:96 5933; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s0, 16 5934; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22 5935; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23 5936; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2 5937; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s3 5938; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48 5939; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x100000 5940; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 5941; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19 5942; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s20 5943; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s21 5944; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32 5945; GCN-NOHSA-VI-NEXT: s_nop 0 5946; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16 5947; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s17 5948; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s14 5949; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s15 5950; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 5951; GCN-NOHSA-VI-NEXT: s_nop 0 5952; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 5953; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13 5954; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s0 5955; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s1 5956; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 5957; GCN-NOHSA-VI-NEXT: s_endpgm 5958; 5959; EG-LABEL: constant_sextload_v16i16_to_v16i64: 5960; EG: ; %bb.0: 5961; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 5962; EG-NEXT: TEX 1 @12 5963; EG-NEXT: ALU 65, @17, KC0[CB0:0-32], KC1[] 5964; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T12.X, 0 5965; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T20.X, 0 5966; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T18.X, 0 5967; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T17.X, 0 5968; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T16.X, 0 5969; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T15.X, 0 5970; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T14.X, 0 5971; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T13.X, 1 5972; EG-NEXT: CF_END 5973; EG-NEXT: Fetch clause starting at 12: 5974; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 5975; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 5976; EG-NEXT: ALU clause starting at 16: 5977; EG-NEXT: MOV * T11.X, KC0[2].Z, 5978; EG-NEXT: ALU clause starting at 17: 5979; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 5980; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5981; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5982; EG-NEXT: LSHR T14.X, PV.W, literal.x, 5983; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5984; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 5985; EG-NEXT: LSHR T15.X, PV.W, literal.x, 5986; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5987; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 5988; EG-NEXT: LSHR T16.X, PV.W, literal.x, 5989; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5990; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 5991; EG-NEXT: LSHR T17.X, PV.W, literal.x, 5992; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5993; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 5994; EG-NEXT: LSHR T18.X, PV.W, literal.x, 5995; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 5996; EG-NEXT: ASHR * T19.W, T11.X, literal.z, 5997; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 5998; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5999; EG-NEXT: LSHR T20.X, PV.W, literal.x, 6000; EG-NEXT: ASHR T19.Z, T11.X, literal.y, 6001; EG-NEXT: ASHR * T21.W, T11.Y, literal.z, 6002; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6003; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6004; EG-NEXT: BFE_INT T19.X, T11.X, 0.0, literal.x, 6005; EG-NEXT: ASHR T21.Z, T11.Y, literal.x, 6006; EG-NEXT: ASHR * T22.W, T11.Z, literal.y, 6007; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6008; EG-NEXT: BFE_INT T21.X, T11.Y, 0.0, literal.x, 6009; EG-NEXT: ASHR T19.Y, PV.X, literal.y, 6010; EG-NEXT: ASHR T22.Z, T11.Z, literal.x, 6011; EG-NEXT: ASHR * T23.W, T11.W, literal.y, 6012; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6013; EG-NEXT: BFE_INT T22.X, T11.Z, 0.0, literal.x, 6014; EG-NEXT: ASHR T21.Y, PV.X, literal.y, 6015; EG-NEXT: ASHR T23.Z, T11.W, literal.x, 6016; EG-NEXT: ASHR * T24.W, T12.X, literal.y, 6017; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6018; EG-NEXT: BFE_INT T23.X, T11.W, 0.0, literal.x, 6019; EG-NEXT: ASHR T22.Y, PV.X, literal.y, 6020; EG-NEXT: ASHR T24.Z, T12.X, literal.x, 6021; EG-NEXT: ASHR * T11.W, T12.Y, literal.y, 6022; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6023; EG-NEXT: BFE_INT T24.X, T12.X, 0.0, literal.x, 6024; EG-NEXT: ASHR T23.Y, PV.X, literal.y, 6025; EG-NEXT: ASHR T11.Z, T12.Y, literal.x, 6026; EG-NEXT: ASHR * T25.W, T12.Z, literal.y, 6027; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6028; EG-NEXT: BFE_INT T11.X, T12.Y, 0.0, literal.x, 6029; EG-NEXT: ASHR T24.Y, PV.X, literal.y, 6030; EG-NEXT: ASHR T25.Z, T12.Z, literal.x, 6031; EG-NEXT: ASHR * T26.W, T12.W, literal.y, 6032; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6033; EG-NEXT: BFE_INT T25.X, T12.Z, 0.0, literal.x, 6034; EG-NEXT: ASHR T11.Y, PV.X, literal.y, 6035; EG-NEXT: ASHR * T26.Z, T12.W, literal.x, 6036; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6037; EG-NEXT: BFE_INT T26.X, T12.W, 0.0, literal.x, 6038; EG-NEXT: ASHR T25.Y, PV.X, literal.y, 6039; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 6040; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6041; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 6042; EG-NEXT: LSHR T12.X, PV.W, literal.x, 6043; EG-NEXT: ASHR * T26.Y, PV.X, literal.y, 6044; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6045 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in 6046 %ext = sext <16 x i16> %load to <16 x i64> 6047 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 6048 ret void 6049} 6050 6051define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 { 6052; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i64: 6053; GCN-NOHSA-SI: ; %bb.0: 6054; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 6055; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6056; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0 6057; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6058; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s5, 16 6059; GCN-NOHSA-SI-NEXT: s_lshr_b32 s21, s7, 16 6060; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s9, 16 6061; GCN-NOHSA-SI-NEXT: s_lshr_b32 s23, s11, 16 6062; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s13, 16 6063; GCN-NOHSA-SI-NEXT: s_lshr_b32 s25, s15, 16 6064; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s17, 16 6065; GCN-NOHSA-SI-NEXT: s_lshr_b32 s27, s19, 16 6066; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s18, 16 6067; GCN-NOHSA-SI-NEXT: s_lshr_b32 s29, s16, 16 6068; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s14, 16 6069; GCN-NOHSA-SI-NEXT: s_lshr_b32 s31, s12, 16 6070; GCN-NOHSA-SI-NEXT: s_lshr_b32 s33, s10, 16 6071; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s8, 16 6072; GCN-NOHSA-SI-NEXT: s_lshr_b32 s35, s6, 16 6073; GCN-NOHSA-SI-NEXT: s_lshr_b32 s36, s4, 16 6074; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, 0xffff 6075; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, 0xffff 6076; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, 0xffff 6077; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, 0xffff 6078; GCN-NOHSA-SI-NEXT: s_and_b32 s12, s12, 0xffff 6079; GCN-NOHSA-SI-NEXT: s_and_b32 s14, s14, 0xffff 6080; GCN-NOHSA-SI-NEXT: s_and_b32 s16, s16, 0xffff 6081; GCN-NOHSA-SI-NEXT: s_and_b32 s18, s18, 0xffff 6082; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, 0xffff 6083; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, 0xffff 6084; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, 0xffff 6085; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, 0xffff 6086; GCN-NOHSA-SI-NEXT: s_and_b32 s13, s13, 0xffff 6087; GCN-NOHSA-SI-NEXT: s_and_b32 s15, s15, 0xffff 6088; GCN-NOHSA-SI-NEXT: s_and_b32 s17, s17, 0xffff 6089; GCN-NOHSA-SI-NEXT: s_and_b32 s19, s19, 0xffff 6090; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6091; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 6092; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6093; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 6094; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s19 6095; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s27 6096; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 6097; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6098; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s17 6099; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s26 6100; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 6101; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6102; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s15 6103; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s25 6104; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 6105; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6106; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s13 6107; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24 6108; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 6109; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6110; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s11 6111; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s23 6112; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 6113; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6114; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s9 6115; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s22 6116; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 6117; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6118; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s7 6119; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s21 6120; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6121; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6122; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 6123; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20 6124; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6125; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6126; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 6127; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s28 6128; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 6129; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6130; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s16 6131; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s29 6132; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 6133; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6134; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 6135; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s30 6136; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 6137; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6138; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 6139; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s31 6140; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 6141; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6142; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 6143; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s33 6144; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 6145; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6146; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 6147; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s34 6148; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 6149; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6150; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 6151; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s35 6152; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6153; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6154; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 6155; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s36 6156; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 6157; GCN-NOHSA-SI-NEXT: s_endpgm 6158; 6159; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i64: 6160; GCN-HSA: ; %bb.0: 6161; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 6162; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 6163; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 6164; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6165; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0 6166; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6167; GCN-HSA-NEXT: s_lshr_b32 s3, s5, 16 6168; GCN-HSA-NEXT: s_lshr_b32 s20, s7, 16 6169; GCN-HSA-NEXT: s_lshr_b32 s21, s9, 16 6170; GCN-HSA-NEXT: s_lshr_b32 s22, s11, 16 6171; GCN-HSA-NEXT: s_lshr_b32 s23, s13, 16 6172; GCN-HSA-NEXT: s_lshr_b32 s24, s15, 16 6173; GCN-HSA-NEXT: s_lshr_b32 s25, s17, 16 6174; GCN-HSA-NEXT: s_lshr_b32 s26, s19, 16 6175; GCN-HSA-NEXT: s_lshr_b32 s27, s18, 16 6176; GCN-HSA-NEXT: s_lshr_b32 s28, s16, 16 6177; GCN-HSA-NEXT: s_lshr_b32 s29, s14, 16 6178; GCN-HSA-NEXT: s_lshr_b32 s30, s12, 16 6179; GCN-HSA-NEXT: s_lshr_b32 s31, s10, 16 6180; GCN-HSA-NEXT: s_lshr_b32 s33, s8, 16 6181; GCN-HSA-NEXT: s_lshr_b32 s34, s6, 16 6182; GCN-HSA-NEXT: s_lshr_b32 s2, s4, 16 6183; GCN-HSA-NEXT: s_and_b32 s35, s4, 0xffff 6184; GCN-HSA-NEXT: s_and_b32 s6, s6, 0xffff 6185; GCN-HSA-NEXT: s_and_b32 s8, s8, 0xffff 6186; GCN-HSA-NEXT: s_and_b32 s10, s10, 0xffff 6187; GCN-HSA-NEXT: s_and_b32 s12, s12, 0xffff 6188; GCN-HSA-NEXT: s_and_b32 s14, s14, 0xffff 6189; GCN-HSA-NEXT: s_and_b32 s16, s16, 0xffff 6190; GCN-HSA-NEXT: s_and_b32 s18, s18, 0xffff 6191; GCN-HSA-NEXT: s_and_b32 s36, s5, 0xffff 6192; GCN-HSA-NEXT: s_and_b32 s7, s7, 0xffff 6193; GCN-HSA-NEXT: s_and_b32 s9, s9, 0xffff 6194; GCN-HSA-NEXT: s_and_b32 s11, s11, 0xffff 6195; GCN-HSA-NEXT: s_and_b32 s13, s13, 0xffff 6196; GCN-HSA-NEXT: s_and_b32 s15, s15, 0xffff 6197; GCN-HSA-NEXT: s_and_b32 s17, s17, 0xffff 6198; GCN-HSA-NEXT: s_and_b32 s19, s19, 0xffff 6199; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xf0 6200; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6201; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 6202; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 6203; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xd0 6204; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6205; GCN-HSA-NEXT: v_mov_b32_e32 v7, s5 6206; GCN-HSA-NEXT: v_mov_b32_e32 v6, s4 6207; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xb0 6208; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6209; GCN-HSA-NEXT: v_mov_b32_e32 v9, s5 6210; GCN-HSA-NEXT: v_mov_b32_e32 v8, s4 6211; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x90 6212; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6213; GCN-HSA-NEXT: v_mov_b32_e32 v0, s19 6214; GCN-HSA-NEXT: v_mov_b32_e32 v2, s26 6215; GCN-HSA-NEXT: v_mov_b32_e32 v11, s5 6216; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6217; GCN-HSA-NEXT: v_mov_b32_e32 v10, s4 6218; GCN-HSA-NEXT: v_mov_b32_e32 v0, s17 6219; GCN-HSA-NEXT: v_mov_b32_e32 v2, s25 6220; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 6221; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x70 6222; GCN-HSA-NEXT: v_mov_b32_e32 v0, s15 6223; GCN-HSA-NEXT: v_mov_b32_e32 v2, s24 6224; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 6225; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6226; GCN-HSA-NEXT: v_mov_b32_e32 v0, s13 6227; GCN-HSA-NEXT: v_mov_b32_e32 v2, s23 6228; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 6229; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3] 6230; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 6231; GCN-HSA-NEXT: v_mov_b32_e32 v0, s11 6232; GCN-HSA-NEXT: v_mov_b32_e32 v2, s22 6233; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x50 6234; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6235; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6236; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 6237; GCN-HSA-NEXT: v_mov_b32_e32 v0, s9 6238; GCN-HSA-NEXT: v_mov_b32_e32 v2, s21 6239; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 6240; GCN-HSA-NEXT: s_add_u32 s4, s0, 48 6241; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6242; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6243; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 6244; GCN-HSA-NEXT: v_mov_b32_e32 v0, s7 6245; GCN-HSA-NEXT: v_mov_b32_e32 v2, s20 6246; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 6247; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 6248; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6249; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6250; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 6251; GCN-HSA-NEXT: v_mov_b32_e32 v0, s36 6252; GCN-HSA-NEXT: v_mov_b32_e32 v2, s3 6253; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 6254; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xe0 6255; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6256; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6257; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 6258; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18 6259; GCN-HSA-NEXT: v_mov_b32_e32 v2, s27 6260; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 6261; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xc0 6262; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6263; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6264; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 6265; GCN-HSA-NEXT: v_mov_b32_e32 v0, s16 6266; GCN-HSA-NEXT: v_mov_b32_e32 v2, s28 6267; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 6268; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xa0 6269; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6270; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6271; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 6272; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 6273; GCN-HSA-NEXT: v_mov_b32_e32 v2, s29 6274; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 6275; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x80 6276; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6277; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6278; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 6279; GCN-HSA-NEXT: v_mov_b32_e32 v0, s12 6280; GCN-HSA-NEXT: v_mov_b32_e32 v2, s30 6281; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 6282; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x60 6283; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6284; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6285; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 6286; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 6287; GCN-HSA-NEXT: v_mov_b32_e32 v2, s31 6288; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 6289; GCN-HSA-NEXT: s_add_u32 s4, s0, 64 6290; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6291; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6292; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 6293; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 6294; GCN-HSA-NEXT: v_mov_b32_e32 v2, s33 6295; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 6296; GCN-HSA-NEXT: s_add_u32 s4, s0, 32 6297; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6298; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6299; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 6300; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 6301; GCN-HSA-NEXT: v_mov_b32_e32 v2, s34 6302; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 6303; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6304; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6305; GCN-HSA-NEXT: v_mov_b32_e32 v0, s35 6306; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 6307; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6308; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6309; GCN-HSA-NEXT: s_endpgm 6310; 6311; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i64: 6312; GCN-NOHSA-VI: ; %bb.0: 6313; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x24 6314; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 6315; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 6316; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6317; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 6318; GCN-NOHSA-VI-NEXT: s_mov_b32 s19, 0xf000 6319; GCN-NOHSA-VI-NEXT: s_mov_b32 s18, -1 6320; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6321; GCN-NOHSA-VI-NEXT: s_lshr_b32 s27, s15, 16 6322; GCN-NOHSA-VI-NEXT: s_and_b32 s15, s15, 0xffff 6323; GCN-NOHSA-VI-NEXT: s_lshr_b32 s26, s13, 16 6324; GCN-NOHSA-VI-NEXT: s_and_b32 s13, s13, 0xffff 6325; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s15 6326; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s27 6327; GCN-NOHSA-VI-NEXT: s_lshr_b32 s25, s11, 16 6328; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, 0xffff 6329; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:240 6330; GCN-NOHSA-VI-NEXT: s_lshr_b32 s24, s9, 16 6331; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s13 6332; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s26 6333; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, 0xffff 6334; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:208 6335; GCN-NOHSA-VI-NEXT: s_lshr_b32 s23, s7, 16 6336; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s11 6337; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s25 6338; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, 0xffff 6339; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:176 6340; GCN-NOHSA-VI-NEXT: s_lshr_b32 s22, s5, 16 6341; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s9 6342; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s24 6343; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, 0xffff 6344; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:144 6345; GCN-NOHSA-VI-NEXT: s_lshr_b32 s21, s3, 16 6346; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s7 6347; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s23 6348; GCN-NOHSA-VI-NEXT: s_and_b32 s3, s3, 0xffff 6349; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112 6350; GCN-NOHSA-VI-NEXT: s_lshr_b32 s20, s1, 16 6351; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s5 6352; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s22 6353; GCN-NOHSA-VI-NEXT: s_and_b32 s1, s1, 0xffff 6354; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80 6355; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s14, 16 6356; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s3 6357; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s21 6358; GCN-NOHSA-VI-NEXT: s_and_b32 s14, s14, 0xffff 6359; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48 6360; GCN-NOHSA-VI-NEXT: s_lshr_b32 s29, s12, 16 6361; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s1 6362; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s20 6363; GCN-NOHSA-VI-NEXT: s_and_b32 s12, s12, 0xffff 6364; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16 6365; GCN-NOHSA-VI-NEXT: s_lshr_b32 s30, s10, 16 6366; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 6367; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s28 6368; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, 0xffff 6369; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:224 6370; GCN-NOHSA-VI-NEXT: s_lshr_b32 s31, s8, 16 6371; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 6372; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s29 6373; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, 0xffff 6374; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:192 6375; GCN-NOHSA-VI-NEXT: s_lshr_b32 s33, s6, 16 6376; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 6377; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s30 6378; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, 0xffff 6379; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:160 6380; GCN-NOHSA-VI-NEXT: s_lshr_b32 s34, s4, 16 6381; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 6382; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s31 6383; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, 0xffff 6384; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:128 6385; GCN-NOHSA-VI-NEXT: s_lshr_b32 s35, s2, 16 6386; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 6387; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s33 6388; GCN-NOHSA-VI-NEXT: s_and_b32 s2, s2, 0xffff 6389; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96 6390; GCN-NOHSA-VI-NEXT: s_lshr_b32 s36, s0, 16 6391; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 6392; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s34 6393; GCN-NOHSA-VI-NEXT: s_and_b32 s0, s0, 0xffff 6394; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:64 6395; GCN-NOHSA-VI-NEXT: s_nop 0 6396; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 6397; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s35 6398; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32 6399; GCN-NOHSA-VI-NEXT: s_nop 0 6400; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 6401; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s36 6402; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 6403; GCN-NOHSA-VI-NEXT: s_endpgm 6404; 6405; EG-LABEL: constant_zextload_v32i16_to_v32i64: 6406; EG: ; %bb.0: 6407; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 6408; EG-NEXT: TEX 2 @22 6409; EG-NEXT: ALU 33, @31, KC0[], KC1[] 6410; EG-NEXT: TEX 0 @28 6411; EG-NEXT: ALU 92, @65, KC0[CB0:0-32], KC1[] 6412; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T50.X, 0 6413; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T49.X, 0 6414; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T48.X, 0 6415; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T47.X, 0 6416; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T46.X, 0 6417; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T45.X, 0 6418; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T44.X, 0 6419; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T43.X, 0 6420; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T42.X, 0 6421; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T41.X, 0 6422; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T40.X, 0 6423; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T39.X, 0 6424; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T38.X, 0 6425; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T37.X, 0 6426; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T36.X, 0 6427; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T35.X, 1 6428; EG-NEXT: CF_END 6429; EG-NEXT: Fetch clause starting at 22: 6430; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1 6431; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 16, #1 6432; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1 6433; EG-NEXT: Fetch clause starting at 28: 6434; EG-NEXT: VTX_READ_128 T29.XYZW, T19.X, 0, #1 6435; EG-NEXT: ALU clause starting at 30: 6436; EG-NEXT: MOV * T19.X, KC0[2].Z, 6437; EG-NEXT: ALU clause starting at 31: 6438; EG-NEXT: LSHR * T23.Z, T20.W, literal.x, 6439; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6440; EG-NEXT: AND_INT T23.X, T20.W, literal.x, 6441; EG-NEXT: MOV T23.Y, 0.0, 6442; EG-NEXT: LSHR T24.Z, T20.Z, literal.y, 6443; EG-NEXT: AND_INT * T24.X, T20.Z, literal.x, 6444; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6445; EG-NEXT: MOV T24.Y, 0.0, 6446; EG-NEXT: LSHR * T25.Z, T20.Y, literal.x, 6447; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6448; EG-NEXT: AND_INT T25.X, T20.Y, literal.x, 6449; EG-NEXT: MOV T25.Y, 0.0, 6450; EG-NEXT: LSHR T20.Z, T20.X, literal.y, 6451; EG-NEXT: AND_INT * T20.X, T20.X, literal.x, 6452; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6453; EG-NEXT: MOV T20.Y, 0.0, 6454; EG-NEXT: LSHR * T26.Z, T22.W, literal.x, 6455; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6456; EG-NEXT: AND_INT T26.X, T22.W, literal.x, 6457; EG-NEXT: MOV T26.Y, 0.0, 6458; EG-NEXT: LSHR T27.Z, T22.Z, literal.y, 6459; EG-NEXT: AND_INT * T27.X, T22.Z, literal.x, 6460; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6461; EG-NEXT: MOV T27.Y, 0.0, 6462; EG-NEXT: LSHR * T28.Z, T22.Y, literal.x, 6463; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6464; EG-NEXT: AND_INT T28.X, T22.Y, literal.x, 6465; EG-NEXT: MOV T28.Y, 0.0, 6466; EG-NEXT: LSHR T22.Z, T22.X, literal.y, 6467; EG-NEXT: AND_INT * T22.X, T22.X, literal.x, 6468; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6469; EG-NEXT: MOV T22.Y, 0.0, 6470; EG-NEXT: LSHR * T19.Z, T21.W, literal.x, 6471; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6472; EG-NEXT: ALU clause starting at 65: 6473; EG-NEXT: AND_INT T19.X, T21.W, literal.x, 6474; EG-NEXT: MOV T19.Y, 0.0, 6475; EG-NEXT: LSHR T30.Z, T21.Z, literal.y, 6476; EG-NEXT: AND_INT * T30.X, T21.Z, literal.x, 6477; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6478; EG-NEXT: MOV T30.Y, 0.0, 6479; EG-NEXT: LSHR * T31.Z, T21.Y, literal.x, 6480; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6481; EG-NEXT: AND_INT T31.X, T21.Y, literal.x, 6482; EG-NEXT: MOV T31.Y, 0.0, 6483; EG-NEXT: LSHR T21.Z, T21.X, literal.y, 6484; EG-NEXT: AND_INT * T21.X, T21.X, literal.x, 6485; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6486; EG-NEXT: MOV T21.Y, 0.0, 6487; EG-NEXT: LSHR * T32.Z, T29.W, literal.x, 6488; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6489; EG-NEXT: AND_INT T32.X, T29.W, literal.x, 6490; EG-NEXT: MOV T32.Y, 0.0, 6491; EG-NEXT: LSHR T33.Z, T29.Z, literal.y, 6492; EG-NEXT: AND_INT * T33.X, T29.Z, literal.x, 6493; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6494; EG-NEXT: MOV T33.Y, 0.0, 6495; EG-NEXT: LSHR * T34.Z, T29.Y, literal.x, 6496; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6497; EG-NEXT: AND_INT T34.X, T29.Y, literal.x, 6498; EG-NEXT: MOV T34.Y, 0.0, 6499; EG-NEXT: LSHR T29.Z, T29.X, literal.y, 6500; EG-NEXT: AND_INT * T29.X, T29.X, literal.x, 6501; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6502; EG-NEXT: MOV T29.Y, 0.0, 6503; EG-NEXT: MOV T23.W, 0.0, 6504; EG-NEXT: MOV * T24.W, 0.0, 6505; EG-NEXT: MOV T25.W, 0.0, 6506; EG-NEXT: MOV * T20.W, 0.0, 6507; EG-NEXT: MOV T26.W, 0.0, 6508; EG-NEXT: MOV * T27.W, 0.0, 6509; EG-NEXT: MOV T28.W, 0.0, 6510; EG-NEXT: MOV * T22.W, 0.0, 6511; EG-NEXT: MOV T19.W, 0.0, 6512; EG-NEXT: MOV * T30.W, 0.0, 6513; EG-NEXT: MOV T31.W, 0.0, 6514; EG-NEXT: MOV * T21.W, 0.0, 6515; EG-NEXT: MOV T32.W, 0.0, 6516; EG-NEXT: MOV * T33.W, 0.0, 6517; EG-NEXT: MOV T34.W, 0.0, 6518; EG-NEXT: MOV * T29.W, 0.0, 6519; EG-NEXT: LSHR T35.X, KC0[2].Y, literal.x, 6520; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6521; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6522; EG-NEXT: LSHR T36.X, PV.W, literal.x, 6523; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6524; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6525; EG-NEXT: LSHR T37.X, PV.W, literal.x, 6526; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6527; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6528; EG-NEXT: LSHR T38.X, PV.W, literal.x, 6529; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6530; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 6531; EG-NEXT: LSHR T39.X, PV.W, literal.x, 6532; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6533; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 6534; EG-NEXT: LSHR T40.X, PV.W, literal.x, 6535; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6536; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 6537; EG-NEXT: LSHR T41.X, PV.W, literal.x, 6538; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6539; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 6540; EG-NEXT: LSHR T42.X, PV.W, literal.x, 6541; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6542; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 6543; EG-NEXT: LSHR T43.X, PV.W, literal.x, 6544; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6545; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 6546; EG-NEXT: LSHR T44.X, PV.W, literal.x, 6547; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6548; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 6549; EG-NEXT: LSHR T45.X, PV.W, literal.x, 6550; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6551; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 6552; EG-NEXT: LSHR T46.X, PV.W, literal.x, 6553; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6554; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 6555; EG-NEXT: LSHR T47.X, PV.W, literal.x, 6556; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6557; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 6558; EG-NEXT: LSHR T48.X, PV.W, literal.x, 6559; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6560; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) 6561; EG-NEXT: LSHR T49.X, PV.W, literal.x, 6562; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6563; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) 6564; EG-NEXT: LSHR * T50.X, PV.W, literal.x, 6565; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6566 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in 6567 %ext = zext <32 x i16> %load to <32 x i64> 6568 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 6569 ret void 6570} 6571 6572define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 { 6573; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i64: 6574; GCN-NOHSA-SI: ; %bb.0: 6575; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x9 6576; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6577; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 6578; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6579; GCN-NOHSA-SI-NEXT: s_mov_b32 s18, s15 6580; GCN-NOHSA-SI-NEXT: s_mov_b32 s20, s13 6581; GCN-NOHSA-SI-NEXT: s_mov_b32 s36, s11 6582; GCN-NOHSA-SI-NEXT: s_mov_b32 s40, s9 6583; GCN-NOHSA-SI-NEXT: s_mov_b32 s44, s7 6584; GCN-NOHSA-SI-NEXT: s_mov_b32 s46, s5 6585; GCN-NOHSA-SI-NEXT: s_mov_b32 s38, s3 6586; GCN-NOHSA-SI-NEXT: s_mov_b32 s42, s1 6587; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s14, 16 6588; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s12, 16 6589; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s10, 16 6590; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s8, 16 6591; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[48:49], s[20:21], 0x100000 6592; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[50:51], s[18:19], 0x100000 6593; GCN-NOHSA-SI-NEXT: s_lshr_b32 s52, s6, 16 6594; GCN-NOHSA-SI-NEXT: s_lshr_b32 s54, s4, 16 6595; GCN-NOHSA-SI-NEXT: s_lshr_b32 s56, s2, 16 6596; GCN-NOHSA-SI-NEXT: s_lshr_b32 s58, s0, 16 6597; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x100000 6598; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[2:3], 0x100000 6599; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[4:5], 0x100000 6600; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[30:31], s[6:7], 0x100000 6601; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[60:61], s[8:9], 0x100000 6602; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[62:63], s[10:11], 0x100000 6603; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[64:65], s[12:13], 0x100000 6604; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[66:67], s[14:15], 0x100000 6605; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[68:69], s[0:1], 48 6606; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[70:71], s[2:3], 48 6607; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 6608; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[8:9], s[8:9], 48 6609; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[10:11], s[10:11], 48 6610; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[2:3], s[12:13], 48 6611; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[12:13], s[14:15], 48 6612; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 6613; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 6614; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 6615; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s50 6616; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s51 6617; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 6618; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13 6619; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s48 6620; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s49 6621; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s2 6622; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s3 6623; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6624; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6625; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[46:47], 0x100000 6626; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[44:45], 0x100000 6627; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[40:41], 0x100000 6628; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x100000 6629; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[40:41], s[42:43], 0x100000 6630; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x100000 6631; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s36 6632; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s37 6633; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s10 6634; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s11 6635; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s16 6636; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s17 6637; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s8 6638; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s9 6639; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s14 6640; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s15 6641; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s6 6642; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s7 6643; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s12 6644; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s13 6645; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s4 6646; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s5 6647; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 6648; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[58:59], 0x100000 6649; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[56:57], 0x100000 6650; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[54:55], 0x100000 6651; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[52:53], 0x100000 6652; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[34:35], 0x100000 6653; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[28:29], 0x100000 6654; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[26:27], 0x100000 6655; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000 6656; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:208 6657; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:176 6658; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:144 6659; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112 6660; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80 6661; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(5) 6662; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s38 6663; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s39 6664; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s70 6665; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s71 6666; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6667; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6668; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s40 6669; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s41 6670; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s68 6671; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s69 6672; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6673; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6674; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s66 6675; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s67 6676; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s64 6677; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s65 6678; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s62 6679; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s63 6680; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s60 6681; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s61 6682; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s30 6683; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s31 6684; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s22 6685; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s23 6686; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v24, s20 6687; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v25, s21 6688; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24 6689; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s25 6690; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 6691; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6692; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 6693; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 6694; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s16 6695; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s17 6696; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192 6697; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s14 6698; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s15 6699; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160 6700; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s12 6701; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s13 6702; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128 6703; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s10 6704; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s11 6705; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:96 6706; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s8 6707; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s9 6708; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64 6709; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v26, s6 6710; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, s7 6711; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32 6712; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 6713; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 6714; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 6715; GCN-NOHSA-SI-NEXT: s_endpgm 6716; 6717; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i64: 6718; GCN-HSA: ; %bb.0: 6719; GCN-HSA-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x0 6720; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6721; GCN-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 6722; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6723; GCN-HSA-NEXT: s_mov_b32 s42, s15 6724; GCN-HSA-NEXT: s_mov_b32 s44, s13 6725; GCN-HSA-NEXT: s_mov_b32 s46, s11 6726; GCN-HSA-NEXT: s_mov_b32 s48, s9 6727; GCN-HSA-NEXT: s_mov_b32 s50, s7 6728; GCN-HSA-NEXT: s_mov_b32 s52, s5 6729; GCN-HSA-NEXT: s_mov_b32 s54, s3 6730; GCN-HSA-NEXT: s_mov_b32 s56, s1 6731; GCN-HSA-NEXT: s_lshr_b32 s58, s14, 16 6732; GCN-HSA-NEXT: s_lshr_b32 s60, s12, 16 6733; GCN-HSA-NEXT: s_lshr_b32 s62, s10, 16 6734; GCN-HSA-NEXT: s_lshr_b32 s64, s8, 16 6735; GCN-HSA-NEXT: s_lshr_b32 s66, s6, 16 6736; GCN-HSA-NEXT: s_lshr_b32 s68, s4, 16 6737; GCN-HSA-NEXT: s_lshr_b32 s70, s2, 16 6738; GCN-HSA-NEXT: s_lshr_b32 s72, s0, 16 6739; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x100000 6740; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[2:3], 0x100000 6741; GCN-HSA-NEXT: s_ashr_i64 s[36:37], s[0:1], 48 6742; GCN-HSA-NEXT: s_ashr_i64 s[38:39], s[2:3], 48 6743; GCN-HSA-NEXT: s_ashr_i64 s[0:1], s[14:15], 48 6744; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[42:43], 0x100000 6745; GCN-HSA-NEXT: s_bfe_i64 s[22:23], s[4:5], 0x100000 6746; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[6:7], 0x100000 6747; GCN-HSA-NEXT: s_bfe_i64 s[26:27], s[8:9], 0x100000 6748; GCN-HSA-NEXT: s_bfe_i64 s[28:29], s[10:11], 0x100000 6749; GCN-HSA-NEXT: s_bfe_i64 s[30:31], s[12:13], 0x100000 6750; GCN-HSA-NEXT: s_bfe_i64 s[34:35], s[14:15], 0x100000 6751; GCN-HSA-NEXT: s_ashr_i64 s[40:41], s[4:5], 48 6752; GCN-HSA-NEXT: s_ashr_i64 s[74:75], s[6:7], 48 6753; GCN-HSA-NEXT: s_ashr_i64 s[76:77], s[8:9], 48 6754; GCN-HSA-NEXT: s_ashr_i64 s[78:79], s[10:11], 48 6755; GCN-HSA-NEXT: s_ashr_i64 s[80:81], s[12:13], 48 6756; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6757; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6758; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 6759; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 6760; GCN-HSA-NEXT: s_bfe_i64 s[0:1], s[72:73], 0x100000 6761; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[70:71], 0x100000 6762; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[68:69], 0x100000 6763; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[66:67], 0x100000 6764; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[64:65], 0x100000 6765; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[62:63], 0x100000 6766; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[60:61], 0x100000 6767; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[58:59], 0x100000 6768; GCN-HSA-NEXT: s_bfe_i64 s[42:43], s[56:57], 0x100000 6769; GCN-HSA-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x100000 6770; GCN-HSA-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x100000 6771; GCN-HSA-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x100000 6772; GCN-HSA-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x100000 6773; GCN-HSA-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x100000 6774; GCN-HSA-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x100000 6775; GCN-HSA-NEXT: s_add_u32 s56, s16, 0xf0 6776; GCN-HSA-NEXT: s_addc_u32 s57, s17, 0 6777; GCN-HSA-NEXT: v_mov_b32_e32 v4, s44 6778; GCN-HSA-NEXT: s_add_u32 s44, s16, 0xd0 6779; GCN-HSA-NEXT: v_mov_b32_e32 v5, s45 6780; GCN-HSA-NEXT: s_addc_u32 s45, s17, 0 6781; GCN-HSA-NEXT: v_mov_b32_e32 v24, s44 6782; GCN-HSA-NEXT: v_mov_b32_e32 v25, s45 6783; GCN-HSA-NEXT: s_add_u32 s44, s16, 0xb0 6784; GCN-HSA-NEXT: s_addc_u32 s45, s17, 0 6785; GCN-HSA-NEXT: v_mov_b32_e32 v26, s44 6786; GCN-HSA-NEXT: v_mov_b32_e32 v27, s45 6787; GCN-HSA-NEXT: s_add_u32 s44, s16, 0x90 6788; GCN-HSA-NEXT: s_addc_u32 s45, s17, 0 6789; GCN-HSA-NEXT: v_mov_b32_e32 v28, s44 6790; GCN-HSA-NEXT: v_mov_b32_e32 v22, s56 6791; GCN-HSA-NEXT: v_mov_b32_e32 v29, s45 6792; GCN-HSA-NEXT: s_add_u32 s44, s16, 0x70 6793; GCN-HSA-NEXT: v_mov_b32_e32 v23, s57 6794; GCN-HSA-NEXT: s_addc_u32 s45, s17, 0 6795; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[0:3] 6796; GCN-HSA-NEXT: v_mov_b32_e32 v22, s40 6797; GCN-HSA-NEXT: s_add_u32 s40, s16, 0x50 6798; GCN-HSA-NEXT: v_mov_b32_e32 v23, s41 6799; GCN-HSA-NEXT: s_addc_u32 s41, s17, 0 6800; GCN-HSA-NEXT: v_mov_b32_e32 v6, s80 6801; GCN-HSA-NEXT: v_mov_b32_e32 v7, s81 6802; GCN-HSA-NEXT: v_mov_b32_e32 v2, s38 6803; GCN-HSA-NEXT: s_add_u32 s38, s16, 48 6804; GCN-HSA-NEXT: v_mov_b32_e32 v3, s39 6805; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7] 6806; GCN-HSA-NEXT: s_addc_u32 s39, s17, 0 6807; GCN-HSA-NEXT: v_mov_b32_e32 v24, s38 6808; GCN-HSA-NEXT: v_mov_b32_e32 v25, s39 6809; GCN-HSA-NEXT: s_add_u32 s38, s16, 16 6810; GCN-HSA-NEXT: v_mov_b32_e32 v8, s46 6811; GCN-HSA-NEXT: v_mov_b32_e32 v9, s47 6812; GCN-HSA-NEXT: v_mov_b32_e32 v10, s78 6813; GCN-HSA-NEXT: v_mov_b32_e32 v11, s79 6814; GCN-HSA-NEXT: s_addc_u32 s39, s17, 0 6815; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11] 6816; GCN-HSA-NEXT: v_mov_b32_e32 v12, s48 6817; GCN-HSA-NEXT: v_mov_b32_e32 v10, s14 6818; GCN-HSA-NEXT: s_add_u32 s14, s16, 0xe0 6819; GCN-HSA-NEXT: v_mov_b32_e32 v13, s49 6820; GCN-HSA-NEXT: v_mov_b32_e32 v14, s76 6821; GCN-HSA-NEXT: v_mov_b32_e32 v15, s77 6822; GCN-HSA-NEXT: v_mov_b32_e32 v11, s15 6823; GCN-HSA-NEXT: s_addc_u32 s15, s17, 0 6824; GCN-HSA-NEXT: v_mov_b32_e32 v30, s44 6825; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15] 6826; GCN-HSA-NEXT: v_mov_b32_e32 v16, s50 6827; GCN-HSA-NEXT: v_mov_b32_e32 v14, s12 6828; GCN-HSA-NEXT: s_add_u32 s12, s16, 0xc0 6829; GCN-HSA-NEXT: v_mov_b32_e32 v17, s51 6830; GCN-HSA-NEXT: v_mov_b32_e32 v18, s74 6831; GCN-HSA-NEXT: v_mov_b32_e32 v19, s75 6832; GCN-HSA-NEXT: v_mov_b32_e32 v31, s45 6833; GCN-HSA-NEXT: v_mov_b32_e32 v32, s40 6834; GCN-HSA-NEXT: v_mov_b32_e32 v15, s13 6835; GCN-HSA-NEXT: s_addc_u32 s13, s17, 0 6836; GCN-HSA-NEXT: v_mov_b32_e32 v20, s52 6837; GCN-HSA-NEXT: v_mov_b32_e32 v21, s53 6838; GCN-HSA-NEXT: v_mov_b32_e32 v0, s54 6839; GCN-HSA-NEXT: v_mov_b32_e32 v1, s55 6840; GCN-HSA-NEXT: v_mov_b32_e32 v33, s41 6841; GCN-HSA-NEXT: v_mov_b32_e32 v34, s38 6842; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19] 6843; GCN-HSA-NEXT: v_mov_b32_e32 v4, s42 6844; GCN-HSA-NEXT: v_mov_b32_e32 v17, s15 6845; GCN-HSA-NEXT: v_mov_b32_e32 v19, s13 6846; GCN-HSA-NEXT: v_mov_b32_e32 v5, s43 6847; GCN-HSA-NEXT: v_mov_b32_e32 v35, s39 6848; GCN-HSA-NEXT: v_mov_b32_e32 v6, s36 6849; GCN-HSA-NEXT: v_mov_b32_e32 v7, s37 6850; GCN-HSA-NEXT: v_mov_b32_e32 v8, s34 6851; GCN-HSA-NEXT: v_mov_b32_e32 v9, s35 6852; GCN-HSA-NEXT: v_mov_b32_e32 v12, s30 6853; GCN-HSA-NEXT: v_mov_b32_e32 v13, s31 6854; GCN-HSA-NEXT: v_mov_b32_e32 v16, s14 6855; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[20:23] 6856; GCN-HSA-NEXT: v_mov_b32_e32 v18, s12 6857; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3] 6858; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[4:7] 6859; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 6860; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 6861; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 6862; GCN-HSA-NEXT: s_add_u32 s10, s16, 0xa0 6863; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11 6864; GCN-HSA-NEXT: s_addc_u32 s11, s17, 0 6865; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 6866; GCN-HSA-NEXT: v_mov_b32_e32 v0, s28 6867; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29 6868; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 6869; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6870; GCN-HSA-NEXT: s_nop 0 6871; GCN-HSA-NEXT: v_mov_b32_e32 v2, s8 6872; GCN-HSA-NEXT: s_add_u32 s8, s16, 0x80 6873; GCN-HSA-NEXT: v_mov_b32_e32 v3, s9 6874; GCN-HSA-NEXT: s_addc_u32 s9, s17, 0 6875; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8 6876; GCN-HSA-NEXT: v_mov_b32_e32 v0, s26 6877; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27 6878; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9 6879; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6880; GCN-HSA-NEXT: s_nop 0 6881; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 6882; GCN-HSA-NEXT: s_add_u32 s6, s16, 0x60 6883; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 6884; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 6885; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6 6886; GCN-HSA-NEXT: v_mov_b32_e32 v0, s24 6887; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 6888; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7 6889; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6890; GCN-HSA-NEXT: s_nop 0 6891; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 6892; GCN-HSA-NEXT: s_add_u32 s4, s16, 64 6893; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5 6894; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 6895; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 6896; GCN-HSA-NEXT: v_mov_b32_e32 v0, s22 6897; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23 6898; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 6899; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6900; GCN-HSA-NEXT: s_nop 0 6901; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 6902; GCN-HSA-NEXT: s_add_u32 s2, s16, 32 6903; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 6904; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 6905; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6906; GCN-HSA-NEXT: v_mov_b32_e32 v0, s20 6907; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 6908; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6909; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6910; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 6911; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18 6912; GCN-HSA-NEXT: v_mov_b32_e32 v1, s19 6913; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 6914; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 6915; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17 6916; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6917; GCN-HSA-NEXT: s_endpgm 6918; 6919; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i64: 6920; GCN-NOHSA-VI: ; %bb.0: 6921; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x24 6922; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6923; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 6924; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6925; GCN-NOHSA-VI-NEXT: s_mov_b32 s36, s15 6926; GCN-NOHSA-VI-NEXT: s_mov_b32 s38, s13 6927; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[82:83], s[14:15], 48 6928; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x100000 6929; GCN-NOHSA-VI-NEXT: s_mov_b32 s40, s11 6930; GCN-NOHSA-VI-NEXT: s_mov_b32 s48, s3 6931; GCN-NOHSA-VI-NEXT: s_mov_b32 s50, s1 6932; GCN-NOHSA-VI-NEXT: s_lshr_b32 s64, s2, 16 6933; GCN-NOHSA-VI-NEXT: s_lshr_b32 s66, s0, 16 6934; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x100000 6935; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[2:3], 0x100000 6936; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[68:69], s[0:1], 48 6937; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[70:71], s[2:3], 48 6938; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[80:81], s[12:13], 48 6939; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 6940; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 6941; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s16 6942; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s17 6943; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x100000 6944; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36 6945; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s37 6946; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s82 6947; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s83 6948; GCN-NOHSA-VI-NEXT: s_mov_b32 s42, s9 6949; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[78:79], s[10:11], 48 6950; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[40:41], s[40:41], 0x100000 6951; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 6952; GCN-NOHSA-VI-NEXT: s_mov_b32 s44, s7 6953; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s38 6954; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s39 6955; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s80 6956; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s81 6957; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[76:77], s[8:9], 48 6958; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[42:43], s[42:43], 0x100000 6959; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 6960; GCN-NOHSA-VI-NEXT: s_mov_b32 s46, s5 6961; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s40 6962; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s41 6963; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s78 6964; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s79 6965; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[74:75], s[6:7], 48 6966; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x100000 6967; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 6968; GCN-NOHSA-VI-NEXT: s_ashr_i64 s[72:73], s[4:5], 48 6969; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s42 6970; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s43 6971; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s76 6972; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s77 6973; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x100000 6974; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 6975; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x100000 6976; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s44 6977; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s45 6978; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s74 6979; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s75 6980; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 6981; GCN-NOHSA-VI-NEXT: s_lshr_b32 s52, s14, 16 6982; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s46 6983; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s47 6984; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s72 6985; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s73 6986; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x100000 6987; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 6988; GCN-NOHSA-VI-NEXT: s_lshr_b32 s54, s12, 16 6989; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s48 6990; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s49 6991; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s70 6992; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s71 6993; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[14:15], 0x100000 6994; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x100000 6995; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6996; GCN-NOHSA-VI-NEXT: s_lshr_b32 s56, s10, 16 6997; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s50 6998; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s51 6999; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s68 7000; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s69 7001; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[12:13], 0x100000 7002; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[54:55], 0x100000 7003; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 7004; GCN-NOHSA-VI-NEXT: s_lshr_b32 s58, s8, 16 7005; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s34 7006; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s35 7007; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s52 7008; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s53 7009; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[28:29], s[10:11], 0x100000 7010; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[56:57], 0x100000 7011; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 7012; GCN-NOHSA-VI-NEXT: s_lshr_b32 s60, s6, 16 7013; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30 7014; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31 7015; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16 7016; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17 7017; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[8:9], 0x100000 7018; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[58:59], 0x100000 7019; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 7020; GCN-NOHSA-VI-NEXT: s_lshr_b32 s62, s4, 16 7021; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 7022; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29 7023; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s14 7024; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s15 7025; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[6:7], 0x100000 7026; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[60:61], 0x100000 7027; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 7028; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[4:5], 0x100000 7029; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s26 7030; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s27 7031; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s12 7032; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s13 7033; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[62:63], 0x100000 7034; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 7035; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[64:65], 0x100000 7036; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 7037; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25 7038; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 7039; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11 7040; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 7041; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[66:67], 0x100000 7042; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22 7043; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23 7044; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s8 7045; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s9 7046; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 7047; GCN-NOHSA-VI-NEXT: s_nop 0 7048; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20 7049; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s21 7050; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 7051; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 7052; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 7053; GCN-NOHSA-VI-NEXT: s_nop 0 7054; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 7055; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19 7056; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 7057; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 7058; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 7059; GCN-NOHSA-VI-NEXT: s_endpgm 7060; 7061; EG-LABEL: constant_sextload_v32i16_to_v32i64: 7062; EG: ; %bb.0: 7063; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 7064; EG-NEXT: TEX 0 @22 7065; EG-NEXT: ALU 55, @31, KC0[CB0:0-32], KC1[] 7066; EG-NEXT: TEX 2 @24 7067; EG-NEXT: ALU 74, @87, KC0[CB0:0-32], KC1[] 7068; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T38.X, 0 7069; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T36.X, 0 7070; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T34.X, 0 7071; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T33.X, 0 7072; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T32.X, 0 7073; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T31.X, 0 7074; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T30.X, 0 7075; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T29.X, 0 7076; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T28.X, 0 7077; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T27.X, 0 7078; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0 7079; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T25.X, 0 7080; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T24.X, 0 7081; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 0 7082; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T22.X, 0 7083; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T21.X, 1 7084; EG-NEXT: CF_END 7085; EG-NEXT: Fetch clause starting at 22: 7086; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 7087; EG-NEXT: Fetch clause starting at 24: 7088; EG-NEXT: VTX_READ_128 T38.XYZW, T19.X, 48, #1 7089; EG-NEXT: VTX_READ_128 T39.XYZW, T19.X, 32, #1 7090; EG-NEXT: VTX_READ_128 T40.XYZW, T19.X, 16, #1 7091; EG-NEXT: ALU clause starting at 30: 7092; EG-NEXT: MOV * T19.X, KC0[2].Z, 7093; EG-NEXT: ALU clause starting at 31: 7094; EG-NEXT: LSHR T21.X, KC0[2].Y, literal.x, 7095; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7096; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7097; EG-NEXT: LSHR T22.X, PV.W, literal.x, 7098; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7099; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7100; EG-NEXT: LSHR T23.X, PV.W, literal.x, 7101; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7102; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7103; EG-NEXT: LSHR T24.X, PV.W, literal.x, 7104; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7105; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7106; EG-NEXT: LSHR T25.X, PV.W, literal.x, 7107; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7108; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7109; EG-NEXT: LSHR T26.X, PV.W, literal.x, 7110; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7111; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7112; EG-NEXT: LSHR T27.X, PV.W, literal.x, 7113; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7114; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 7115; EG-NEXT: LSHR T28.X, PV.W, literal.x, 7116; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7117; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 7118; EG-NEXT: LSHR T29.X, PV.W, literal.x, 7119; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7120; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 7121; EG-NEXT: LSHR T30.X, PV.W, literal.x, 7122; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7123; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 7124; EG-NEXT: LSHR T31.X, PV.W, literal.x, 7125; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7126; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 7127; EG-NEXT: LSHR T32.X, PV.W, literal.x, 7128; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7129; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 7130; EG-NEXT: LSHR T33.X, PV.W, literal.x, 7131; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7132; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 7133; EG-NEXT: LSHR T34.X, PV.W, literal.x, 7134; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 7135; EG-NEXT: ASHR * T35.W, T20.X, literal.z, 7136; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) 7137; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7138; EG-NEXT: LSHR T36.X, PV.W, literal.x, 7139; EG-NEXT: ASHR T35.Z, T20.X, literal.y, 7140; EG-NEXT: ASHR * T37.W, T20.Y, literal.z, 7141; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7142; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7143; EG-NEXT: BFE_INT T35.X, T20.X, 0.0, literal.x, 7144; EG-NEXT: ASHR * T37.Z, T20.Y, literal.x, 7145; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7146; EG-NEXT: BFE_INT T37.X, T20.Y, 0.0, literal.x, 7147; EG-NEXT: ASHR T35.Y, PV.X, literal.y, 7148; EG-NEXT: ASHR * T19.W, T20.Z, literal.y, 7149; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7150; EG-NEXT: ALU clause starting at 87: 7151; EG-NEXT: ASHR T19.Z, T20.Z, literal.x, 7152; EG-NEXT: ASHR * T41.W, T20.W, literal.y, 7153; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7154; EG-NEXT: BFE_INT T19.X, T20.Z, 0.0, literal.x, 7155; EG-NEXT: ASHR T37.Y, T37.X, literal.y, 7156; EG-NEXT: ASHR T41.Z, T20.W, literal.x, 7157; EG-NEXT: ASHR * T42.W, T40.X, literal.y, BS:VEC_120/SCL_212 7158; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7159; EG-NEXT: BFE_INT T41.X, T20.W, 0.0, literal.x, 7160; EG-NEXT: ASHR T19.Y, PV.X, literal.y, 7161; EG-NEXT: ASHR T42.Z, T40.X, literal.x, 7162; EG-NEXT: ASHR * T20.W, T40.Y, literal.y, 7163; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7164; EG-NEXT: BFE_INT T42.X, T40.X, 0.0, literal.x, 7165; EG-NEXT: ASHR T41.Y, PV.X, literal.y, 7166; EG-NEXT: ASHR T20.Z, T40.Y, literal.x, 7167; EG-NEXT: ASHR * T43.W, T40.Z, literal.y, 7168; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7169; EG-NEXT: BFE_INT T20.X, T40.Y, 0.0, literal.x, 7170; EG-NEXT: ASHR T42.Y, PV.X, literal.y, 7171; EG-NEXT: ASHR T43.Z, T40.Z, literal.x, 7172; EG-NEXT: ASHR * T44.W, T40.W, literal.y, 7173; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7174; EG-NEXT: BFE_INT T43.X, T40.Z, 0.0, literal.x, 7175; EG-NEXT: ASHR T20.Y, PV.X, literal.y, 7176; EG-NEXT: ASHR T44.Z, T40.W, literal.x, 7177; EG-NEXT: ASHR * T45.W, T39.X, literal.y, 7178; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7179; EG-NEXT: BFE_INT T44.X, T40.W, 0.0, literal.x, 7180; EG-NEXT: ASHR T43.Y, PV.X, literal.y, 7181; EG-NEXT: ASHR T45.Z, T39.X, literal.x, 7182; EG-NEXT: ASHR * T40.W, T39.Y, literal.y, 7183; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7184; EG-NEXT: BFE_INT T45.X, T39.X, 0.0, literal.x, 7185; EG-NEXT: ASHR T44.Y, PV.X, literal.y, 7186; EG-NEXT: ASHR T40.Z, T39.Y, literal.x, 7187; EG-NEXT: ASHR * T46.W, T39.Z, literal.y, 7188; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7189; EG-NEXT: BFE_INT T40.X, T39.Y, 0.0, literal.x, 7190; EG-NEXT: ASHR T45.Y, PV.X, literal.y, 7191; EG-NEXT: ASHR T46.Z, T39.Z, literal.x, 7192; EG-NEXT: ASHR * T47.W, T39.W, literal.y, 7193; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7194; EG-NEXT: BFE_INT T46.X, T39.Z, 0.0, literal.x, 7195; EG-NEXT: ASHR T40.Y, PV.X, literal.y, 7196; EG-NEXT: ASHR T47.Z, T39.W, literal.x, 7197; EG-NEXT: ASHR * T48.W, T38.X, literal.y, 7198; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7199; EG-NEXT: BFE_INT T47.X, T39.W, 0.0, literal.x, 7200; EG-NEXT: ASHR T46.Y, PV.X, literal.y, 7201; EG-NEXT: ASHR T48.Z, T38.X, literal.x, 7202; EG-NEXT: ASHR * T39.W, T38.Y, literal.y, 7203; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7204; EG-NEXT: BFE_INT T48.X, T38.X, 0.0, literal.x, 7205; EG-NEXT: ASHR T47.Y, PV.X, literal.y, 7206; EG-NEXT: ASHR T39.Z, T38.Y, literal.x, 7207; EG-NEXT: ASHR * T49.W, T38.Z, literal.y, 7208; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7209; EG-NEXT: BFE_INT T39.X, T38.Y, 0.0, literal.x, 7210; EG-NEXT: ASHR T48.Y, PV.X, literal.y, 7211; EG-NEXT: ASHR T49.Z, T38.Z, literal.x, 7212; EG-NEXT: ASHR * T50.W, T38.W, literal.y, 7213; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7214; EG-NEXT: BFE_INT T49.X, T38.Z, 0.0, literal.x, 7215; EG-NEXT: ASHR T39.Y, PV.X, literal.y, 7216; EG-NEXT: ASHR * T50.Z, T38.W, literal.x, 7217; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7218; EG-NEXT: BFE_INT T50.X, T38.W, 0.0, literal.x, 7219; EG-NEXT: ASHR T49.Y, PV.X, literal.y, 7220; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 7221; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7222; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) 7223; EG-NEXT: LSHR T38.X, PV.W, literal.x, 7224; EG-NEXT: ASHR * T50.Y, PV.X, literal.y, 7225; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 7226 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in 7227 %ext = sext <32 x i16> %load to <32 x i64> 7228 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 7229 ret void 7230} 7231 7232; These trigger undefined register machine verifier errors 7233 7234; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 { 7235; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in 7236; %ext = zext <64 x i16> %load to <64 x i64> 7237; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 7238; ret void 7239; } 7240 7241; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 { 7242; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in 7243; %ext = sext <64 x i16> %load to <64 x i64> 7244; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 7245; ret void 7246; } 7247 7248attributes #0 = { nounwind } 7249