1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-NOHSA-SI %s 3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-HSA %s 4; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefix=GCN-NOHSA-VI %s 5; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck --check-prefix=EG %s 6 7define amdgpu_kernel void @constant_load_i16(i16 addrspace(1)* %out, i16 addrspace(4)* %in) { 8; GCN-NOHSA-SI-LABEL: constant_load_i16: 9; GCN-NOHSA-SI: ; %bb.0: ; %entry 10; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 11; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 12; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 13; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 14; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 15; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 16; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 17; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 18; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 19; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 20; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 21; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 22; GCN-NOHSA-SI-NEXT: buffer_store_short v0, off, s[4:7], 0 23; GCN-NOHSA-SI-NEXT: s_endpgm 24; 25; GCN-HSA-LABEL: constant_load_i16: 26; GCN-HSA: ; %bb.0: ; %entry 27; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 28; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 29; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 30; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 31; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1] 32; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 33; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 34; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 35; GCN-HSA-NEXT: flat_store_short v[0:1], v2 36; GCN-HSA-NEXT: s_endpgm 37; 38; GCN-NOHSA-VI-LABEL: constant_load_i16: 39; GCN-NOHSA-VI: ; %bb.0: ; %entry 40; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 41; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 42; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 43; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 44; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 45; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 46; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 47; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 48; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 49; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 50; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 51; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 52; GCN-NOHSA-VI-NEXT: buffer_store_short v0, off, s[4:7], 0 53; GCN-NOHSA-VI-NEXT: s_endpgm 54; 55; EG-LABEL: constant_load_i16: 56; EG: ; %bb.0: ; %entry 57; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 58; EG-NEXT: TEX 0 @6 59; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 60; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 61; EG-NEXT: CF_END 62; EG-NEXT: PAD 63; EG-NEXT: Fetch clause starting at 6: 64; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 65; EG-NEXT: ALU clause starting at 8: 66; EG-NEXT: MOV * T0.X, KC0[2].Z, 67; EG-NEXT: ALU clause starting at 9: 68; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 69; EG-NEXT: AND_INT * T1.W, T0.X, literal.y, 70; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 71; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 72; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 73; EG-NEXT: LSHL T0.X, T1.W, PV.W, 74; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 75; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 76; EG-NEXT: MOV T0.Y, 0.0, 77; EG-NEXT: MOV * T0.Z, 0.0, 78; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 79; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 80entry: 81 %ld = load i16, i16 addrspace(4)* %in 82 store i16 %ld, i16 addrspace(1)* %out 83 ret void 84} 85 86define amdgpu_kernel void @constant_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) { 87; GCN-NOHSA-SI-LABEL: constant_load_v2i16: 88; GCN-NOHSA-SI: ; %bb.0: ; %entry 89; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 90; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 91; GCN-NOHSA-SI-NEXT: s_load_dword s4, s[2:3], 0x0 92; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 93; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 94; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 95; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 96; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 97; GCN-NOHSA-SI-NEXT: s_endpgm 98; 99; GCN-HSA-LABEL: constant_load_v2i16: 100; GCN-HSA: ; %bb.0: ; %entry 101; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 102; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 103; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 104; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 105; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 106; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 107; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 108; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 109; GCN-HSA-NEXT: s_endpgm 110; 111; GCN-NOHSA-VI-LABEL: constant_load_v2i16: 112; GCN-NOHSA-VI: ; %bb.0: ; %entry 113; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 114; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 115; GCN-NOHSA-VI-NEXT: s_load_dword s4, s[2:3], 0x0 116; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 117; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 118; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 119; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 120; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 121; GCN-NOHSA-VI-NEXT: s_endpgm 122; 123; EG-LABEL: constant_load_v2i16: 124; EG: ; %bb.0: ; %entry 125; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 126; EG-NEXT: TEX 0 @6 127; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 128; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 129; EG-NEXT: CF_END 130; EG-NEXT: PAD 131; EG-NEXT: Fetch clause starting at 6: 132; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 133; EG-NEXT: ALU clause starting at 8: 134; EG-NEXT: MOV * T0.X, KC0[2].Z, 135; EG-NEXT: ALU clause starting at 9: 136; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 137; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 138entry: 139 %ld = load <2 x i16>, <2 x i16> addrspace(4)* %in 140 store <2 x i16> %ld, <2 x i16> addrspace(1)* %out 141 ret void 142} 143 144define amdgpu_kernel void @constant_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) { 145; GCN-NOHSA-SI-LABEL: constant_load_v3i16: 146; GCN-NOHSA-SI: ; %bb.0: ; %entry 147; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 148; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 149; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 150; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 151; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 152; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 153; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 154; GCN-NOHSA-SI-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:4 155; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 156; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 157; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 158; GCN-NOHSA-SI-NEXT: s_endpgm 159; 160; GCN-HSA-LABEL: constant_load_v3i16: 161; GCN-HSA: ; %bb.0: ; %entry 162; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 163; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 164; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 165; GCN-HSA-NEXT: s_add_u32 s4, s0, 4 166; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 167; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 168; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 169; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5 170; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 171; GCN-HSA-NEXT: v_mov_b32_e32 v4, s3 172; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 173; GCN-HSA-NEXT: v_mov_b32_e32 v5, s2 174; GCN-HSA-NEXT: flat_store_short v[2:3], v4 175; GCN-HSA-NEXT: flat_store_dword v[0:1], v5 176; GCN-HSA-NEXT: s_endpgm 177; 178; GCN-NOHSA-VI-LABEL: constant_load_v3i16: 179; GCN-NOHSA-VI: ; %bb.0: ; %entry 180; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 181; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 182; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 183; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 184; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 185; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 186; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s5 187; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s4 188; GCN-NOHSA-VI-NEXT: buffer_store_short v0, off, s[0:3], 0 offset:4 189; GCN-NOHSA-VI-NEXT: buffer_store_dword v1, off, s[0:3], 0 190; GCN-NOHSA-VI-NEXT: s_endpgm 191; 192; EG-LABEL: constant_load_v3i16: 193; EG: ; %bb.0: ; %entry 194; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 195; EG-NEXT: TEX 2 @6 196; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[] 197; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0 198; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X 199; EG-NEXT: CF_END 200; EG-NEXT: Fetch clause starting at 6: 201; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1 202; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1 203; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1 204; EG-NEXT: ALU clause starting at 12: 205; EG-NEXT: MOV * T5.X, KC0[2].Z, 206; EG-NEXT: ALU clause starting at 13: 207; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 208; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) 209; EG-NEXT: AND_INT T1.W, PV.W, literal.x, 210; EG-NEXT: AND_INT * T2.W, T5.X, literal.y, 211; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 212; EG-NEXT: LSHL * T1.W, PV.W, literal.x, 213; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 214; EG-NEXT: LSHL T5.X, T2.W, PV.W, 215; EG-NEXT: LSHL * T5.W, literal.x, PV.W, 216; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 217; EG-NEXT: MOV T5.Y, 0.0, 218; EG-NEXT: MOV * T5.Z, 0.0, 219; EG-NEXT: LSHR T8.X, T0.W, literal.x, 220; EG-NEXT: LSHL T0.W, T7.X, literal.y, 221; EG-NEXT: AND_INT * T1.W, T6.X, literal.z, 222; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 223; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 224; EG-NEXT: OR_INT T6.X, PV.W, PS, 225; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, 226; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 227entry: 228 %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in 229 store <3 x i16> %ld, <3 x i16> addrspace(1)* %out 230 ret void 231} 232 233define amdgpu_kernel void @constant_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) { 234; GCN-NOHSA-SI-LABEL: constant_load_v4i16: 235; GCN-NOHSA-SI: ; %bb.0: ; %entry 236; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 237; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 238; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 239; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 240; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 241; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 242; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 243; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5 244; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 245; GCN-NOHSA-SI-NEXT: s_endpgm 246; 247; GCN-HSA-LABEL: constant_load_v4i16: 248; GCN-HSA: ; %bb.0: ; %entry 249; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 250; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 251; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 252; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 253; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 254; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 255; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 256; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 257; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 258; GCN-HSA-NEXT: s_endpgm 259; 260; GCN-NOHSA-VI-LABEL: constant_load_v4i16: 261; GCN-NOHSA-VI: ; %bb.0: ; %entry 262; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 263; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 264; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 265; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 266; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 267; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 268; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 269; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5 270; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 271; GCN-NOHSA-VI-NEXT: s_endpgm 272; 273; EG-LABEL: constant_load_v4i16: 274; EG: ; %bb.0: ; %entry 275; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 276; EG-NEXT: TEX 0 @6 277; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 278; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 279; EG-NEXT: CF_END 280; EG-NEXT: PAD 281; EG-NEXT: Fetch clause starting at 6: 282; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 283; EG-NEXT: ALU clause starting at 8: 284; EG-NEXT: MOV * T0.X, KC0[2].Z, 285; EG-NEXT: ALU clause starting at 9: 286; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 287; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 288entry: 289 %ld = load <4 x i16>, <4 x i16> addrspace(4)* %in 290 store <4 x i16> %ld, <4 x i16> addrspace(1)* %out 291 ret void 292} 293 294define amdgpu_kernel void @constant_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) { 295; GCN-NOHSA-SI-LABEL: constant_load_v8i16: 296; GCN-NOHSA-SI: ; %bb.0: ; %entry 297; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 298; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 299; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 300; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 301; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 302; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 303; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 304; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5 305; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 306; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 307; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 308; GCN-NOHSA-SI-NEXT: s_endpgm 309; 310; GCN-HSA-LABEL: constant_load_v8i16: 311; GCN-HSA: ; %bb.0: ; %entry 312; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 313; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 314; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 315; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 316; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 317; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 318; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 319; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 320; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 321; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 322; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 323; GCN-HSA-NEXT: s_endpgm 324; 325; GCN-NOHSA-VI-LABEL: constant_load_v8i16: 326; GCN-NOHSA-VI: ; %bb.0: ; %entry 327; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 328; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 329; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 330; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 331; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 332; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 333; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 334; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5 335; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 336; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 337; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 338; GCN-NOHSA-VI-NEXT: s_endpgm 339; 340; EG-LABEL: constant_load_v8i16: 341; EG: ; %bb.0: ; %entry 342; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 343; EG-NEXT: TEX 0 @6 344; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 345; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 346; EG-NEXT: CF_END 347; EG-NEXT: PAD 348; EG-NEXT: Fetch clause starting at 6: 349; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 350; EG-NEXT: ALU clause starting at 8: 351; EG-NEXT: MOV * T0.X, KC0[2].Z, 352; EG-NEXT: ALU clause starting at 9: 353; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 354; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 355entry: 356 %ld = load <8 x i16>, <8 x i16> addrspace(4)* %in 357 store <8 x i16> %ld, <8 x i16> addrspace(1)* %out 358 ret void 359} 360 361define amdgpu_kernel void @constant_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) { 362; GCN-NOHSA-SI-LABEL: constant_load_v16i16: 363; GCN-NOHSA-SI: ; %bb.0: ; %entry 364; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x9 365; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 366; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 367; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, 0xf000 368; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, -1 369; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 370; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 371; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s5 372; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 373; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 374; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 375; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 376; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s0 377; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s1 378; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s2 379; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s3 380; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 381; GCN-NOHSA-SI-NEXT: s_endpgm 382; 383; GCN-HSA-LABEL: constant_load_v16i16: 384; GCN-HSA: ; %bb.0: ; %entry 385; GCN-HSA-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x0 386; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 387; GCN-HSA-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 388; GCN-HSA-NEXT: s_add_u32 s10, s8, 16 389; GCN-HSA-NEXT: s_addc_u32 s11, s9, 0 390; GCN-HSA-NEXT: v_mov_b32_e32 v6, s10 391; GCN-HSA-NEXT: v_mov_b32_e32 v7, s11 392; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 393; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 394; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 395; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 396; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 397; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 398; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 399; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 400; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 401; GCN-HSA-NEXT: v_mov_b32_e32 v6, s2 402; GCN-HSA-NEXT: v_mov_b32_e32 v7, s3 403; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 404; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 405; GCN-HSA-NEXT: s_endpgm 406; 407; GCN-NOHSA-VI-LABEL: constant_load_v16i16: 408; GCN-NOHSA-VI: ; %bb.0: ; %entry 409; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x24 410; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 411; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 412; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, 0xf000 413; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, -1 414; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 415; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 416; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5 417; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 418; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 419; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, s0 420; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, s1 421; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, s2 422; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, s3 423; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 424; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[8:11], 0 425; GCN-NOHSA-VI-NEXT: s_endpgm 426; 427; EG-LABEL: constant_load_v16i16: 428; EG: ; %bb.0: ; %entry 429; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 430; EG-NEXT: TEX 0 @8 431; EG-NEXT: ALU 3, @13, KC0[CB0:0-32], KC1[] 432; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 433; EG-NEXT: ALU 1, @17, KC0[CB0:0-32], KC1[] 434; EG-NEXT: TEX 0 @10 435; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 436; EG-NEXT: CF_END 437; EG-NEXT: Fetch clause starting at 8: 438; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 439; EG-NEXT: Fetch clause starting at 10: 440; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 441; EG-NEXT: ALU clause starting at 12: 442; EG-NEXT: MOV * T0.X, KC0[2].Z, 443; EG-NEXT: ALU clause starting at 13: 444; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 445; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 446; EG-NEXT: LSHR * T2.X, PV.W, literal.x, 447; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 448; EG-NEXT: ALU clause starting at 17: 449; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 450; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 451entry: 452 %ld = load <16 x i16>, <16 x i16> addrspace(4)* %in 453 store <16 x i16> %ld, <16 x i16> addrspace(1)* %out 454 ret void 455} 456 457define amdgpu_kernel void @constant_load_v16i16_align2(<16 x i16> addrspace(4)* %ptr0) #0 { 458; GCN-NOHSA-SI-LABEL: constant_load_v16i16_align2: 459; GCN-NOHSA-SI: ; %bb.0: ; %entry 460; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 461; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 462; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 463; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 464; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 465; GCN-NOHSA-SI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:2 466; GCN-NOHSA-SI-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:4 467; GCN-NOHSA-SI-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:6 468; GCN-NOHSA-SI-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:8 469; GCN-NOHSA-SI-NEXT: buffer_load_ushort v3, off, s[0:3], 0 offset:10 470; GCN-NOHSA-SI-NEXT: buffer_load_ushort v6, off, s[0:3], 0 offset:12 471; GCN-NOHSA-SI-NEXT: buffer_load_ushort v7, off, s[0:3], 0 offset:14 472; GCN-NOHSA-SI-NEXT: buffer_load_ushort v8, off, s[0:3], 0 offset:16 473; GCN-NOHSA-SI-NEXT: buffer_load_ushort v9, off, s[0:3], 0 offset:18 474; GCN-NOHSA-SI-NEXT: buffer_load_ushort v10, off, s[0:3], 0 offset:20 475; GCN-NOHSA-SI-NEXT: buffer_load_ushort v11, off, s[0:3], 0 offset:22 476; GCN-NOHSA-SI-NEXT: buffer_load_ushort v12, off, s[0:3], 0 offset:24 477; GCN-NOHSA-SI-NEXT: buffer_load_ushort v13, off, s[0:3], 0 offset:26 478; GCN-NOHSA-SI-NEXT: buffer_load_ushort v14, off, s[0:3], 0 offset:28 479; GCN-NOHSA-SI-NEXT: buffer_load_ushort v15, off, s[0:3], 0 offset:30 480; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(8) 481; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v7, 16, v7 482; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v16, 16, v3 483; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v17, 16, v2 484; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v18, 16, v1 485; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 486; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v15, 16, v15 487; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v13, 16, v13 488; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v11, 16, v11 489; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v9, 16, v9 490; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v3, v7, v6 491; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v2, v16, v5 492; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v1, v17, v4 493; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v0, v18, v0 494; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v7, v15, v14 495; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v6, v13, v12 496; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v5, v11, v10 497; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v4, v9, v8 498; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 499; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 500; GCN-NOHSA-SI-NEXT: s_endpgm 501; 502; GCN-HSA-LABEL: constant_load_v16i16_align2: 503; GCN-HSA: ; %bb.0: ; %entry 504; GCN-HSA-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 505; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 506; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 507; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 508; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 509; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 510; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 511; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 512; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 513; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 514; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 515; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[0:3] 516; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 517; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 518; GCN-HSA-NEXT: s_endpgm 519; 520; GCN-NOHSA-VI-LABEL: constant_load_v16i16_align2: 521; GCN-NOHSA-VI: ; %bb.0: ; %entry 522; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 523; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 524; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 525; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 526; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 offset:14 527; GCN-NOHSA-VI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 offset:10 528; GCN-NOHSA-VI-NEXT: buffer_load_ushort v2, off, s[0:3], 0 offset:6 529; GCN-NOHSA-VI-NEXT: buffer_load_ushort v3, off, s[0:3], 0 offset:2 530; GCN-NOHSA-VI-NEXT: buffer_load_ushort v4, off, s[0:3], 0 offset:30 531; GCN-NOHSA-VI-NEXT: buffer_load_ushort v5, off, s[0:3], 0 offset:26 532; GCN-NOHSA-VI-NEXT: buffer_load_ushort v6, off, s[0:3], 0 offset:22 533; GCN-NOHSA-VI-NEXT: buffer_load_ushort v7, off, s[0:3], 0 offset:18 534; GCN-NOHSA-VI-NEXT: buffer_load_ushort v8, off, s[0:3], 0 offset:12 535; GCN-NOHSA-VI-NEXT: buffer_load_ushort v9, off, s[0:3], 0 offset:8 536; GCN-NOHSA-VI-NEXT: buffer_load_ushort v10, off, s[0:3], 0 offset:4 537; GCN-NOHSA-VI-NEXT: buffer_load_ushort v11, off, s[0:3], 0 538; GCN-NOHSA-VI-NEXT: buffer_load_ushort v12, off, s[0:3], 0 offset:28 539; GCN-NOHSA-VI-NEXT: buffer_load_ushort v13, off, s[0:3], 0 offset:24 540; GCN-NOHSA-VI-NEXT: buffer_load_ushort v14, off, s[0:3], 0 offset:20 541; GCN-NOHSA-VI-NEXT: buffer_load_ushort v15, off, s[0:3], 0 offset:16 542; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(14) 543; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 544; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 545; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(13) 546; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v16, 16, v2 547; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(12) 548; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v17, 16, v3 549; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(11) 550; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 551; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(10) 552; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 553; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(9) 554; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v18, 16, v6 555; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(8) 556; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v19, 16, v7 557; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(7) 558; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v3, v0, v8 559; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(6) 560; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v2, v1, v9 561; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(5) 562; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v1, v16, v10 563; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(4) 564; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v0, v17, v11 565; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 566; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v7, v4, v12 567; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(2) 568; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v6, v5, v13 569; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 570; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v5, v18, v14 571; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 572; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v4, v19, v15 573; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 574; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 575; GCN-NOHSA-VI-NEXT: s_endpgm 576; 577; EG-LABEL: constant_load_v16i16_align2: 578; EG: ; %bb.0: ; %entry 579; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 580; EG-NEXT: TEX 0 @8 581; EG-NEXT: ALU 1, @13, KC0[], KC1[] 582; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 583; EG-NEXT: TEX 0 @10 584; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1 585; EG-NEXT: CF_END 586; EG-NEXT: PAD 587; EG-NEXT: Fetch clause starting at 8: 588; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 589; EG-NEXT: Fetch clause starting at 10: 590; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 591; EG-NEXT: ALU clause starting at 12: 592; EG-NEXT: MOV * T0.X, KC0[2].Y, 593; EG-NEXT: ALU clause starting at 13: 594; EG-NEXT: MOV * T2.X, literal.x, 595; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00) 596entry: 597 %ld = load <16 x i16>, <16 x i16> addrspace(4)* %ptr0, align 2 598 store <16 x i16> %ld, <16 x i16> addrspace(1)* undef, align 32 599 ret void 600} 601 602define amdgpu_kernel void @constant_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 603; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i32: 604; GCN-NOHSA-SI: ; %bb.0: 605; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 606; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 607; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 608; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 609; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 610; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 611; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 612; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 613; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 614; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 615; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 616; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 617; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 618; GCN-NOHSA-SI-NEXT: s_endpgm 619; 620; GCN-HSA-LABEL: constant_zextload_i16_to_i32: 621; GCN-HSA: ; %bb.0: 622; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 623; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 624; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 625; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 626; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1] 627; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 628; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 629; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 630; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 631; GCN-HSA-NEXT: s_endpgm 632; 633; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i32: 634; GCN-NOHSA-VI: ; %bb.0: 635; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 636; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 637; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 638; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 639; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 640; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 641; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 642; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 643; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 644; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 645; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 646; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 647; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 648; GCN-NOHSA-VI-NEXT: s_endpgm 649; 650; EG-LABEL: constant_zextload_i16_to_i32: 651; EG: ; %bb.0: 652; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 653; EG-NEXT: TEX 0 @6 654; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 655; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 656; EG-NEXT: CF_END 657; EG-NEXT: PAD 658; EG-NEXT: Fetch clause starting at 6: 659; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 660; EG-NEXT: ALU clause starting at 8: 661; EG-NEXT: MOV * T0.X, KC0[2].Z, 662; EG-NEXT: ALU clause starting at 9: 663; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 664; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 665 %a = load i16, i16 addrspace(4)* %in 666 %ext = zext i16 %a to i32 667 store i32 %ext, i32 addrspace(1)* %out 668 ret void 669} 670 671define amdgpu_kernel void @constant_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 672; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i32: 673; GCN-NOHSA-SI: ; %bb.0: 674; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 675; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 676; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 677; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 678; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 679; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 680; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 681; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 682; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 683; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 684; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 685; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 686; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 687; GCN-NOHSA-SI-NEXT: s_endpgm 688; 689; GCN-HSA-LABEL: constant_sextload_i16_to_i32: 690; GCN-HSA: ; %bb.0: 691; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 692; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 693; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 694; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 695; GCN-HSA-NEXT: flat_load_sshort v2, v[0:1] 696; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 697; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 698; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 699; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 700; GCN-HSA-NEXT: s_endpgm 701; 702; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i32: 703; GCN-NOHSA-VI: ; %bb.0: 704; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 705; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 706; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 707; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 708; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 709; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 710; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 711; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 712; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 713; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 714; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 715; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 716; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 717; GCN-NOHSA-VI-NEXT: s_endpgm 718; 719; EG-LABEL: constant_sextload_i16_to_i32: 720; EG: ; %bb.0: 721; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 722; EG-NEXT: TEX 0 @6 723; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 724; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 725; EG-NEXT: CF_END 726; EG-NEXT: PAD 727; EG-NEXT: Fetch clause starting at 6: 728; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 729; EG-NEXT: ALU clause starting at 8: 730; EG-NEXT: MOV * T0.X, KC0[2].Z, 731; EG-NEXT: ALU clause starting at 9: 732; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 733; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 734; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 735 %a = load i16, i16 addrspace(4)* %in 736 %ext = sext i16 %a to i32 737 store i32 %ext, i32 addrspace(1)* %out 738 ret void 739} 740 741define amdgpu_kernel void @constant_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 { 742; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i32: 743; GCN-NOHSA-SI: ; %bb.0: 744; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 745; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 746; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 747; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 748; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 749; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 750; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 751; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 752; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 753; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 754; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 755; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 756; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 757; GCN-NOHSA-SI-NEXT: s_endpgm 758; 759; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i32: 760; GCN-HSA: ; %bb.0: 761; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 762; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 763; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 764; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 765; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1] 766; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 767; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 768; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 769; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 770; GCN-HSA-NEXT: s_endpgm 771; 772; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i32: 773; GCN-NOHSA-VI: ; %bb.0: 774; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 775; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 776; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 777; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 778; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 779; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 780; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 781; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 782; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 783; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 784; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 785; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 786; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 787; GCN-NOHSA-VI-NEXT: s_endpgm 788; 789; EG-LABEL: constant_zextload_v1i16_to_v1i32: 790; EG: ; %bb.0: 791; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 792; EG-NEXT: TEX 0 @6 793; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 794; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 795; EG-NEXT: CF_END 796; EG-NEXT: PAD 797; EG-NEXT: Fetch clause starting at 6: 798; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 799; EG-NEXT: ALU clause starting at 8: 800; EG-NEXT: MOV * T0.X, KC0[2].Z, 801; EG-NEXT: ALU clause starting at 9: 802; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 803; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 804 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in 805 %ext = zext <1 x i16> %load to <1 x i32> 806 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 807 ret void 808} 809 810define amdgpu_kernel void @constant_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 { 811; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i32: 812; GCN-NOHSA-SI: ; %bb.0: 813; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 814; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 815; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 816; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 817; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 818; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 819; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 820; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 821; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 822; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 823; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 824; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 825; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 826; GCN-NOHSA-SI-NEXT: s_endpgm 827; 828; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i32: 829; GCN-HSA: ; %bb.0: 830; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 831; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 832; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 833; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 834; GCN-HSA-NEXT: flat_load_sshort v2, v[0:1] 835; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 836; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 837; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 838; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 839; GCN-HSA-NEXT: s_endpgm 840; 841; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i32: 842; GCN-NOHSA-VI: ; %bb.0: 843; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 844; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 845; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 846; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 847; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 848; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 849; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 850; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 851; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 852; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 853; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 854; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 855; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 856; GCN-NOHSA-VI-NEXT: s_endpgm 857; 858; EG-LABEL: constant_sextload_v1i16_to_v1i32: 859; EG: ; %bb.0: 860; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 861; EG-NEXT: TEX 0 @6 862; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 863; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 864; EG-NEXT: CF_END 865; EG-NEXT: PAD 866; EG-NEXT: Fetch clause starting at 6: 867; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 868; EG-NEXT: ALU clause starting at 8: 869; EG-NEXT: MOV * T0.X, KC0[2].Z, 870; EG-NEXT: ALU clause starting at 9: 871; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 872; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 873; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 874 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in 875 %ext = sext <1 x i16> %load to <1 x i32> 876 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 877 ret void 878} 879 880define amdgpu_kernel void @constant_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 { 881; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i32: 882; GCN-NOHSA-SI: ; %bb.0: 883; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 884; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 885; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 886; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 887; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 888; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s2, 16 889; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s2, 0xffff 890; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 891; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 892; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s4 893; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 894; GCN-NOHSA-SI-NEXT: s_endpgm 895; 896; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i32: 897; GCN-HSA: ; %bb.0: 898; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 899; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 900; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 901; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 902; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 903; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 904; GCN-HSA-NEXT: s_lshr_b32 s0, s2, 16 905; GCN-HSA-NEXT: s_and_b32 s1, s2, 0xffff 906; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 907; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 908; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 909; GCN-HSA-NEXT: s_endpgm 910; 911; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i32: 912; GCN-NOHSA-VI: ; %bb.0: 913; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 914; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 915; GCN-NOHSA-VI-NEXT: s_load_dword s4, s[2:3], 0x0 916; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 917; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 918; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 919; GCN-NOHSA-VI-NEXT: s_lshr_b32 s5, s4, 16 920; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, 0xffff 921; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 922; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5 923; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 924; GCN-NOHSA-VI-NEXT: s_endpgm 925; 926; EG-LABEL: constant_zextload_v2i16_to_v2i32: 927; EG: ; %bb.0: 928; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 929; EG-NEXT: TEX 0 @6 930; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 931; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1 932; EG-NEXT: CF_END 933; EG-NEXT: PAD 934; EG-NEXT: Fetch clause starting at 6: 935; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 936; EG-NEXT: ALU clause starting at 8: 937; EG-NEXT: MOV * T4.X, KC0[2].Z, 938; EG-NEXT: ALU clause starting at 9: 939; EG-NEXT: LSHR * T4.Y, T4.X, literal.x, 940; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 941; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 942; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 943; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 944 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in 945 %ext = zext <2 x i16> %load to <2 x i32> 946 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 947 ret void 948} 949 950; TODO: We should use ASHR instead of LSHR + BFE 951define amdgpu_kernel void @constant_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 { 952; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i32: 953; GCN-NOHSA-SI: ; %bb.0: 954; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 955; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 956; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 957; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 958; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 959; GCN-NOHSA-SI-NEXT: s_ashr_i32 s4, s2, 16 960; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s2 961; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 962; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 963; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s4 964; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 965; GCN-NOHSA-SI-NEXT: s_endpgm 966; 967; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i32: 968; GCN-HSA: ; %bb.0: 969; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 970; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 971; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 972; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 973; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 974; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 975; GCN-HSA-NEXT: s_ashr_i32 s0, s2, 16 976; GCN-HSA-NEXT: s_sext_i32_i16 s1, s2 977; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 978; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 979; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 980; GCN-HSA-NEXT: s_endpgm 981; 982; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i32: 983; GCN-NOHSA-VI: ; %bb.0: 984; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 985; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 986; GCN-NOHSA-VI-NEXT: s_load_dword s4, s[2:3], 0x0 987; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 988; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 989; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 990; GCN-NOHSA-VI-NEXT: s_ashr_i32 s5, s4, 16 991; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 992; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 993; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s5 994; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 995; GCN-NOHSA-VI-NEXT: s_endpgm 996; 997; EG-LABEL: constant_sextload_v2i16_to_v2i32: 998; EG: ; %bb.0: 999; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1000; EG-NEXT: TEX 0 @6 1001; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[] 1002; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1 1003; EG-NEXT: CF_END 1004; EG-NEXT: PAD 1005; EG-NEXT: Fetch clause starting at 6: 1006; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1007; EG-NEXT: ALU clause starting at 8: 1008; EG-NEXT: MOV * T4.X, KC0[2].Z, 1009; EG-NEXT: ALU clause starting at 9: 1010; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x, 1011; EG-NEXT: LSHR T0.W, T4.X, literal.x, 1012; EG-NEXT: LSHR * T4.X, KC0[2].Y, literal.y, 1013; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1014; EG-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.x, 1015; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1016 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in 1017 %ext = sext <2 x i16> %load to <2 x i32> 1018 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 1019 ret void 1020} 1021 1022define amdgpu_kernel void @constant_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) { 1023; GCN-NOHSA-SI-LABEL: constant_zextload_v3i16_to_v3i32: 1024; GCN-NOHSA-SI: ; %bb.0: ; %entry 1025; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1026; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1027; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1028; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1029; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1030; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, 0xffff 1031; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1032; GCN-NOHSA-SI-NEXT: s_lshr_b32 s7, s4, 16 1033; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s6 1034; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s6 1035; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 1036; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 1037; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1038; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1039; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7 1040; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1041; GCN-NOHSA-SI-NEXT: s_endpgm 1042; 1043; GCN-HSA-LABEL: constant_zextload_v3i16_to_v3i32: 1044; GCN-HSA: ; %bb.0: ; %entry 1045; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1046; GCN-HSA-NEXT: s_mov_b32 s4, 0xffff 1047; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1048; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1049; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1050; GCN-HSA-NEXT: v_mov_b32_e32 v4, s1 1051; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1052; GCN-HSA-NEXT: s_lshr_b32 s0, s2, 16 1053; GCN-HSA-NEXT: s_and_b32 s1, s3, s4 1054; GCN-HSA-NEXT: s_and_b32 s2, s2, s4 1055; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1056; GCN-HSA-NEXT: v_mov_b32_e32 v1, s0 1057; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 1058; GCN-HSA-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 1059; GCN-HSA-NEXT: s_endpgm 1060; 1061; GCN-NOHSA-VI-LABEL: constant_zextload_v3i16_to_v3i32: 1062; GCN-NOHSA-VI: ; %bb.0: ; %entry 1063; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1064; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, 0xffff 1065; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1066; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1067; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1068; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1069; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1070; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1071; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1072; GCN-NOHSA-VI-NEXT: s_and_b32 s0, s3, s8 1073; GCN-NOHSA-VI-NEXT: s_lshr_b32 s1, s2, 16 1074; GCN-NOHSA-VI-NEXT: s_and_b32 s2, s2, s8 1075; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 1076; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s1 1077; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s0 1078; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 1079; GCN-NOHSA-VI-NEXT: s_endpgm 1080; 1081; EG-LABEL: constant_zextload_v3i16_to_v3i32: 1082; EG: ; %bb.0: ; %entry 1083; EG-NEXT: ALU 4, @12, KC0[CB0:0-32], KC1[] 1084; EG-NEXT: TEX 2 @6 1085; EG-NEXT: ALU 2, @17, KC0[], KC1[] 1086; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T4.X, 0 1087; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XY, T0.X, 1 1088; EG-NEXT: CF_END 1089; EG-NEXT: Fetch clause starting at 6: 1090; EG-NEXT: VTX_READ_16 T2.X, T1.X, 4, #1 1091; EG-NEXT: VTX_READ_16 T3.X, T1.X, 0, #1 1092; EG-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1 1093; EG-NEXT: ALU clause starting at 12: 1094; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 1095; EG-NEXT: MOV * T1.X, KC0[2].Z, 1096; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1097; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 1098; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1099; EG-NEXT: ALU clause starting at 17: 1100; EG-NEXT: LSHR T4.X, T0.W, literal.x, 1101; EG-NEXT: MOV * T3.Y, T1.X, 1102; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1103entry: 1104 %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in 1105 %ext = zext <3 x i16> %ld to <3 x i32> 1106 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 1107 ret void 1108} 1109 1110define amdgpu_kernel void @constant_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(4)* %in) { 1111; GCN-NOHSA-SI-LABEL: constant_sextload_v3i16_to_v3i32: 1112; GCN-NOHSA-SI: ; %bb.0: ; %entry 1113; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1114; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1115; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1116; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1117; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1118; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1119; GCN-NOHSA-SI-NEXT: s_ashr_i32 s6, s4, 16 1120; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 1121; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 1122; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 1123; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 1124; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1125; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1126; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s6 1127; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1128; GCN-NOHSA-SI-NEXT: s_endpgm 1129; 1130; GCN-HSA-LABEL: constant_sextload_v3i16_to_v3i32: 1131; GCN-HSA: ; %bb.0: ; %entry 1132; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1133; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1134; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1135; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1136; GCN-HSA-NEXT: v_mov_b32_e32 v4, s1 1137; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1138; GCN-HSA-NEXT: s_ashr_i32 s0, s2, 16 1139; GCN-HSA-NEXT: s_sext_i32_i16 s1, s3 1140; GCN-HSA-NEXT: s_sext_i32_i16 s2, s2 1141; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1142; GCN-HSA-NEXT: v_mov_b32_e32 v1, s0 1143; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 1144; GCN-HSA-NEXT: flat_store_dwordx3 v[3:4], v[0:2] 1145; GCN-HSA-NEXT: s_endpgm 1146; 1147; GCN-NOHSA-VI-LABEL: constant_sextload_v3i16_to_v3i32: 1148; GCN-NOHSA-VI: ; %bb.0: ; %entry 1149; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1150; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1151; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1152; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1153; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1154; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1155; GCN-NOHSA-VI-NEXT: s_ashr_i32 s6, s4, 16 1156; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 1157; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 1158; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1159; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s6 1160; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1161; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 1162; GCN-NOHSA-VI-NEXT: s_endpgm 1163; 1164; EG-LABEL: constant_sextload_v3i16_to_v3i32: 1165; EG: ; %bb.0: ; %entry 1166; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 1167; EG-NEXT: TEX 2 @6 1168; EG-NEXT: ALU 9, @13, KC0[CB0:0-32], KC1[] 1169; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 1170; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1171; EG-NEXT: CF_END 1172; EG-NEXT: Fetch clause starting at 6: 1173; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1 1174; EG-NEXT: VTX_READ_16 T2.X, T0.X, 4, #1 1175; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1176; EG-NEXT: ALU clause starting at 12: 1177; EG-NEXT: MOV * T0.X, KC0[2].Z, 1178; EG-NEXT: ALU clause starting at 13: 1179; EG-NEXT: BFE_INT * T0.Y, T1.X, 0.0, literal.x, 1180; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1181; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 1182; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 1183; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1184; EG-NEXT: BFE_INT T2.X, T2.X, 0.0, literal.x, 1185; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1186; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) 1187; EG-NEXT: LSHR * T3.X, PV.W, literal.x, 1188; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1189entry: 1190 %ld = load <3 x i16>, <3 x i16> addrspace(4)* %in 1191 %ext = sext <3 x i16> %ld to <3 x i32> 1192 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 1193 ret void 1194} 1195 1196; v4i16 is naturally 8 byte aligned 1197; TODO: This should use LD, but for some there are redundant MOVs 1198define amdgpu_kernel void @constant_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 { 1199; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i32: 1200; GCN-NOHSA-SI: ; %bb.0: 1201; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1202; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1203; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1204; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1205; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, 0xffff 1206; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1207; GCN-NOHSA-SI-NEXT: s_lshr_b32 s6, s5, 16 1208; GCN-NOHSA-SI-NEXT: s_lshr_b32 s7, s4, 16 1209; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s2 1210; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s2 1211; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1212; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1213; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7 1214; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1215; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s6 1216; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1217; GCN-NOHSA-SI-NEXT: s_endpgm 1218; 1219; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i32: 1220; GCN-HSA: ; %bb.0: 1221; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1222; GCN-HSA-NEXT: s_mov_b32 s4, 0xffff 1223; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1224; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1225; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1226; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1227; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1228; GCN-HSA-NEXT: s_lshr_b32 s0, s3, 16 1229; GCN-HSA-NEXT: s_lshr_b32 s1, s2, 16 1230; GCN-HSA-NEXT: s_and_b32 s3, s3, s4 1231; GCN-HSA-NEXT: s_and_b32 s2, s2, s4 1232; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1233; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1234; GCN-HSA-NEXT: v_mov_b32_e32 v2, s3 1235; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1236; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1237; GCN-HSA-NEXT: s_endpgm 1238; 1239; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i32: 1240; GCN-NOHSA-VI: ; %bb.0: 1241; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1242; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, 0xffff 1243; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1244; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1245; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1246; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1247; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1248; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1249; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1250; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s3, 16 1251; GCN-NOHSA-VI-NEXT: s_and_b32 s1, s3, s8 1252; GCN-NOHSA-VI-NEXT: s_lshr_b32 s3, s2, 16 1253; GCN-NOHSA-VI-NEXT: s_and_b32 s2, s2, s8 1254; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 1255; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 1256; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s1 1257; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s0 1258; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1259; GCN-NOHSA-VI-NEXT: s_endpgm 1260; 1261; EG-LABEL: constant_zextload_v4i16_to_v4i32: 1262; EG: ; %bb.0: 1263; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1264; EG-NEXT: TEX 0 @6 1265; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] 1266; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 1267; EG-NEXT: CF_END 1268; EG-NEXT: PAD 1269; EG-NEXT: Fetch clause starting at 6: 1270; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1271; EG-NEXT: ALU clause starting at 8: 1272; EG-NEXT: MOV * T5.X, KC0[2].Z, 1273; EG-NEXT: ALU clause starting at 9: 1274; EG-NEXT: MOV T2.X, T5.X, 1275; EG-NEXT: MOV * T3.X, T5.Y, 1276; EG-NEXT: MOV T0.Y, PV.X, 1277; EG-NEXT: MOV * T0.Z, PS, 1278; EG-NEXT: LSHR * T5.W, PV.Z, literal.x, 1279; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1280; EG-NEXT: AND_INT * T5.Z, T0.Z, literal.x, 1281; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1282; EG-NEXT: LSHR * T5.Y, T0.Y, literal.x, 1283; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1284; EG-NEXT: AND_INT T5.X, T0.Y, literal.x, 1285; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.y, 1286; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1287 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in 1288 %ext = zext <4 x i16> %load to <4 x i32> 1289 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 1290 ret void 1291} 1292 1293; v4i16 is naturally 8 byte aligned 1294; TODO: This should use LD, but for some there are redundant MOVs 1295; TODO: We should use ASHR instead of LSHR + BFE 1296define amdgpu_kernel void @constant_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 { 1297; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i32: 1298; GCN-NOHSA-SI: ; %bb.0: 1299; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1300; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1301; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1302; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1303; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1304; GCN-NOHSA-SI-NEXT: s_ashr_i32 s8, s4, 16 1305; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[4:5], 48 1306; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 1307; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 1308; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1309; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1310; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s8 1311; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1312; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s6 1313; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1314; GCN-NOHSA-SI-NEXT: s_endpgm 1315; 1316; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i32: 1317; GCN-HSA: ; %bb.0: 1318; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1319; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1320; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 1321; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1322; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1323; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1324; GCN-HSA-NEXT: s_ashr_i64 s[0:1], s[2:3], 48 1325; GCN-HSA-NEXT: s_ashr_i32 s4, s2, 16 1326; GCN-HSA-NEXT: s_sext_i32_i16 s1, s3 1327; GCN-HSA-NEXT: s_sext_i32_i16 s2, s2 1328; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1329; GCN-HSA-NEXT: v_mov_b32_e32 v1, s4 1330; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 1331; GCN-HSA-NEXT: v_mov_b32_e32 v3, s0 1332; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1333; GCN-HSA-NEXT: s_endpgm 1334; 1335; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i32: 1336; GCN-NOHSA-VI: ; %bb.0: 1337; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1338; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1339; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 1340; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1341; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1342; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1343; GCN-NOHSA-VI-NEXT: s_ashr_i32 s6, s5, 16 1344; GCN-NOHSA-VI-NEXT: s_ashr_i32 s7, s4, 16 1345; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 1346; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 1347; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1348; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7 1349; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1350; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s6 1351; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1352; GCN-NOHSA-VI-NEXT: s_endpgm 1353; 1354; EG-LABEL: constant_sextload_v4i16_to_v4i32: 1355; EG: ; %bb.0: 1356; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1357; EG-NEXT: TEX 0 @6 1358; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] 1359; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 1360; EG-NEXT: CF_END 1361; EG-NEXT: PAD 1362; EG-NEXT: Fetch clause starting at 6: 1363; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1364; EG-NEXT: ALU clause starting at 8: 1365; EG-NEXT: MOV * T5.X, KC0[2].Z, 1366; EG-NEXT: ALU clause starting at 9: 1367; EG-NEXT: MOV T2.X, T5.X, 1368; EG-NEXT: MOV * T3.X, T5.Y, 1369; EG-NEXT: MOV T0.Y, PV.X, 1370; EG-NEXT: MOV * T0.Z, PS, 1371; EG-NEXT: BFE_INT * T5.Z, PV.Z, 0.0, literal.x, 1372; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1373; EG-NEXT: BFE_INT T5.X, T0.Y, 0.0, literal.x, 1374; EG-NEXT: LSHR * T0.W, T0.Z, literal.x, 1375; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1376; EG-NEXT: BFE_INT T5.W, PV.W, 0.0, literal.x, 1377; EG-NEXT: LSHR * T0.W, T0.Y, literal.x, 1378; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1379; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 1380; EG-NEXT: BFE_INT * T5.Y, PS, 0.0, literal.y, 1381; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1382 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in 1383 %ext = sext <4 x i16> %load to <4 x i32> 1384 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 1385 ret void 1386} 1387 1388; v8i16 is naturally 16 byte aligned 1389; TODO: These should use LSHR instead of BFE_UINT 1390; TODO: This should use DST, but for some there are redundant MOVs 1391define amdgpu_kernel void @constant_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 { 1392; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i32: 1393; GCN-NOHSA-SI: ; %bb.0: 1394; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1395; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1396; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1397; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1398; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1399; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, 0xffff 1400; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1401; GCN-NOHSA-SI-NEXT: s_lshr_b32 s9, s5, 16 1402; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s4, 16 1403; GCN-NOHSA-SI-NEXT: s_lshr_b32 s11, s7, 16 1404; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s6, 16 1405; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s8 1406; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s8 1407; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, s8 1408; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s8 1409; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 1410; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s12 1411; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 1412; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s11 1413; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1414; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1415; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1416; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s10 1417; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1418; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9 1419; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1420; GCN-NOHSA-SI-NEXT: s_endpgm 1421; 1422; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i32: 1423; GCN-HSA: ; %bb.0: 1424; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1425; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1426; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1427; GCN-HSA-NEXT: s_mov_b32 s2, 0xffff 1428; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1429; GCN-HSA-NEXT: s_lshr_b32 s8, s5, 16 1430; GCN-HSA-NEXT: s_lshr_b32 s9, s4, 16 1431; GCN-HSA-NEXT: s_lshr_b32 s3, s7, 16 1432; GCN-HSA-NEXT: s_lshr_b32 s10, s6, 16 1433; GCN-HSA-NEXT: s_and_b32 s5, s5, s2 1434; GCN-HSA-NEXT: s_and_b32 s4, s4, s2 1435; GCN-HSA-NEXT: s_and_b32 s7, s7, s2 1436; GCN-HSA-NEXT: s_and_b32 s2, s6, s2 1437; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1438; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1439; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 1440; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1441; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1442; GCN-HSA-NEXT: v_mov_b32_e32 v1, s10 1443; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 1444; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1445; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1446; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1447; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 1448; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 1449; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 1450; GCN-HSA-NEXT: v_mov_b32_e32 v3, s8 1451; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1452; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1453; GCN-HSA-NEXT: s_endpgm 1454; 1455; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i32: 1456; GCN-NOHSA-VI: ; %bb.0: 1457; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1458; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1459; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1460; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1461; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0 1462; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, 0xffff 1463; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1464; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1465; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1466; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s9, 16 1467; GCN-NOHSA-VI-NEXT: s_and_b32 s1, s9, s2 1468; GCN-NOHSA-VI-NEXT: s_lshr_b32 s3, s8, 16 1469; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, s2 1470; GCN-NOHSA-VI-NEXT: s_lshr_b32 s9, s11, 16 1471; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, s2 1472; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s10, 16 1473; GCN-NOHSA-VI-NEXT: s_and_b32 s2, s10, s2 1474; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 1475; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s12 1476; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 1477; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s9 1478; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 1479; GCN-NOHSA-VI-NEXT: s_nop 0 1480; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 1481; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s3 1482; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s1 1483; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s0 1484; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1485; GCN-NOHSA-VI-NEXT: s_endpgm 1486; 1487; EG-LABEL: constant_zextload_v8i16_to_v8i32: 1488; EG: ; %bb.0: 1489; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1490; EG-NEXT: TEX 0 @6 1491; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[] 1492; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0 1493; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1 1494; EG-NEXT: CF_END 1495; EG-NEXT: Fetch clause starting at 6: 1496; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 1497; EG-NEXT: ALU clause starting at 8: 1498; EG-NEXT: MOV * T7.X, KC0[2].Z, 1499; EG-NEXT: ALU clause starting at 9: 1500; EG-NEXT: LSHR * T8.W, T7.Y, literal.x, 1501; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1502; EG-NEXT: AND_INT * T8.Z, T7.Y, literal.x, 1503; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1504; EG-NEXT: LSHR T8.Y, T7.X, literal.x, 1505; EG-NEXT: LSHR * T9.W, T7.W, literal.x, 1506; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1507; EG-NEXT: AND_INT T8.X, T7.X, literal.x, 1508; EG-NEXT: AND_INT T9.Z, T7.W, literal.x, 1509; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.y, 1510; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1511; EG-NEXT: LSHR * T9.Y, T7.Z, literal.x, 1512; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1513; EG-NEXT: AND_INT T9.X, T7.Z, literal.x, 1514; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1515; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 1516; EG-NEXT: LSHR * T10.X, PV.W, literal.x, 1517; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1518 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in 1519 %ext = zext <8 x i16> %load to <8 x i32> 1520 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 1521 ret void 1522} 1523 1524; v8i16 is naturally 16 byte aligned 1525; TODO: 4 of these should use ASHR instead of LSHR + BFE_INT 1526; TODO: This should use DST, but for some there are redundant MOVs 1527define amdgpu_kernel void @constant_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 { 1528; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i32: 1529; GCN-NOHSA-SI: ; %bb.0: 1530; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1531; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1532; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1533; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1534; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1535; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1536; GCN-NOHSA-SI-NEXT: s_ashr_i32 s8, s5, 16 1537; GCN-NOHSA-SI-NEXT: s_ashr_i32 s9, s4, 16 1538; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 1539; GCN-NOHSA-SI-NEXT: s_ashr_i32 s10, s7, 16 1540; GCN-NOHSA-SI-NEXT: s_ashr_i32 s11, s6, 16 1541; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7 1542; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 1543; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 1544; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 1545; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11 1546; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 1547; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s10 1548; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1549; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1550; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1551; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s9 1552; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1553; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s8 1554; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1555; GCN-NOHSA-SI-NEXT: s_endpgm 1556; 1557; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i32: 1558; GCN-HSA: ; %bb.0: 1559; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1560; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1561; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1562; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1563; GCN-HSA-NEXT: s_ashr_i32 s8, s5, 16 1564; GCN-HSA-NEXT: s_ashr_i32 s9, s4, 16 1565; GCN-HSA-NEXT: s_ashr_i32 s2, s7, 16 1566; GCN-HSA-NEXT: s_ashr_i32 s3, s6, 16 1567; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 1568; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1569; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1570; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1571; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7 1572; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6 1573; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1574; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 1575; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 1576; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1577; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5 1578; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4 1579; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1580; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1581; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 1582; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 1583; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 1584; GCN-HSA-NEXT: v_mov_b32_e32 v3, s8 1585; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1586; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1587; GCN-HSA-NEXT: s_endpgm 1588; 1589; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i32: 1590; GCN-NOHSA-VI: ; %bb.0: 1591; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1592; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1593; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 1594; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1595; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1596; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1597; GCN-NOHSA-VI-NEXT: s_ashr_i32 s10, s7, 16 1598; GCN-NOHSA-VI-NEXT: s_ashr_i32 s11, s6, 16 1599; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 1600; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 1601; GCN-NOHSA-VI-NEXT: s_ashr_i32 s8, s5, 16 1602; GCN-NOHSA-VI-NEXT: s_ashr_i32 s9, s4, 16 1603; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 1604; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 1605; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 1606; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s11 1607; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 1608; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s10 1609; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1610; GCN-NOHSA-VI-NEXT: s_nop 0 1611; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1612; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 1613; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1614; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s8 1615; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1616; GCN-NOHSA-VI-NEXT: s_endpgm 1617; 1618; EG-LABEL: constant_sextload_v8i16_to_v8i32: 1619; EG: ; %bb.0: 1620; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1621; EG-NEXT: TEX 0 @6 1622; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[] 1623; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0 1624; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1 1625; EG-NEXT: CF_END 1626; EG-NEXT: Fetch clause starting at 6: 1627; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 1628; EG-NEXT: ALU clause starting at 8: 1629; EG-NEXT: MOV * T7.X, KC0[2].Z, 1630; EG-NEXT: ALU clause starting at 9: 1631; EG-NEXT: BFE_INT * T8.Z, T7.Y, 0.0, literal.x, 1632; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1633; EG-NEXT: BFE_INT T8.X, T7.X, 0.0, literal.x, 1634; EG-NEXT: BFE_INT T9.Z, T7.W, 0.0, literal.x, 1635; EG-NEXT: LSHR * T0.W, T7.Y, literal.x, 1636; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1637; EG-NEXT: BFE_INT T9.X, T7.Z, 0.0, literal.x, 1638; EG-NEXT: LSHR T0.Z, T7.W, literal.x, 1639; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, literal.x, 1640; EG-NEXT: LSHR * T0.W, T7.X, literal.x, 1641; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1642; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 1643; EG-NEXT: BFE_INT T8.Y, PS, 0.0, literal.y, 1644; EG-NEXT: LSHR T1.Z, T7.Z, literal.y, 1645; EG-NEXT: BFE_INT T9.W, PV.Z, 0.0, literal.y, 1646; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1647; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1648; EG-NEXT: LSHR T10.X, PS, literal.x, 1649; EG-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y, 1650; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1651 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in 1652 %ext = sext <8 x i16> %load to <8 x i32> 1653 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 1654 ret void 1655} 1656 1657define amdgpu_kernel void @constant_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 { 1658; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i32: 1659; GCN-NOHSA-SI: ; %bb.0: 1660; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1661; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1662; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 1663; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1664; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1665; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, 0xffff 1666; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1667; GCN-NOHSA-SI-NEXT: s_lshr_b32 s13, s5, 16 1668; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s4, 16 1669; GCN-NOHSA-SI-NEXT: s_lshr_b32 s15, s7, 16 1670; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s6, 16 1671; GCN-NOHSA-SI-NEXT: s_lshr_b32 s17, s9, 16 1672; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s8, 16 1673; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s11, 16 1674; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s10, 16 1675; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s12 1676; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s12 1677; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s12 1678; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, s12 1679; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, s12 1680; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, s12 1681; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, s12 1682; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, s12 1683; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 1684; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s20 1685; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 1686; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s19 1687; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 1688; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1689; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 1690; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s18 1691; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 1692; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s17 1693; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 1694; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1695; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 1696; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s16 1697; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 1698; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s15 1699; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1700; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1701; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1702; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s14 1703; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1704; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13 1705; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1706; GCN-NOHSA-SI-NEXT: s_endpgm 1707; 1708; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i32: 1709; GCN-HSA: ; %bb.0: 1710; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1711; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1712; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 1713; GCN-HSA-NEXT: s_mov_b32 s2, 0xffff 1714; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1715; GCN-HSA-NEXT: s_lshr_b32 s12, s5, 16 1716; GCN-HSA-NEXT: s_lshr_b32 s13, s4, 16 1717; GCN-HSA-NEXT: s_lshr_b32 s14, s7, 16 1718; GCN-HSA-NEXT: s_lshr_b32 s15, s6, 16 1719; GCN-HSA-NEXT: s_lshr_b32 s16, s9, 16 1720; GCN-HSA-NEXT: s_lshr_b32 s17, s8, 16 1721; GCN-HSA-NEXT: s_lshr_b32 s3, s11, 16 1722; GCN-HSA-NEXT: s_lshr_b32 s18, s10, 16 1723; GCN-HSA-NEXT: s_and_b32 s5, s5, s2 1724; GCN-HSA-NEXT: s_and_b32 s4, s4, s2 1725; GCN-HSA-NEXT: s_and_b32 s7, s7, s2 1726; GCN-HSA-NEXT: s_and_b32 s6, s6, s2 1727; GCN-HSA-NEXT: s_and_b32 s9, s9, s2 1728; GCN-HSA-NEXT: s_and_b32 s8, s8, s2 1729; GCN-HSA-NEXT: s_and_b32 s11, s11, s2 1730; GCN-HSA-NEXT: s_and_b32 s2, s10, s2 1731; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1732; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 1733; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 1734; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1735; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1736; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1737; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 1738; GCN-HSA-NEXT: v_mov_b32_e32 v1, s18 1739; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 1740; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1741; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1742; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1743; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1744; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1745; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 1746; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17 1747; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 1748; GCN-HSA-NEXT: v_mov_b32_e32 v3, s16 1749; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1750; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1751; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1752; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 1753; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15 1754; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 1755; GCN-HSA-NEXT: v_mov_b32_e32 v3, s14 1756; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1757; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1758; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1759; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 1760; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13 1761; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 1762; GCN-HSA-NEXT: v_mov_b32_e32 v3, s12 1763; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1764; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1765; GCN-HSA-NEXT: s_endpgm 1766; 1767; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i32: 1768; GCN-NOHSA-VI: ; %bb.0: 1769; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x24 1770; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1771; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1772; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1773; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[14:15], 0x0 1774; GCN-NOHSA-VI-NEXT: s_mov_b32 s14, 0xffff 1775; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s12 1776; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s13 1777; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1778; GCN-NOHSA-VI-NEXT: s_lshr_b32 s19, s11, 16 1779; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, s14 1780; GCN-NOHSA-VI-NEXT: s_lshr_b32 s20, s10, 16 1781; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, s14 1782; GCN-NOHSA-VI-NEXT: s_lshr_b32 s17, s9, 16 1783; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, s14 1784; GCN-NOHSA-VI-NEXT: s_lshr_b32 s18, s8, 16 1785; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, s14 1786; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 1787; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s20 1788; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 1789; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s19 1790; GCN-NOHSA-VI-NEXT: s_lshr_b32 s15, s7, 16 1791; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, s14 1792; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s6, 16 1793; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, s14 1794; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 1795; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s5, 16 1796; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 1797; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s18 1798; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 1799; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17 1800; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, s14 1801; GCN-NOHSA-VI-NEXT: s_lshr_b32 s13, s4, 16 1802; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, s14 1803; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 1804; GCN-NOHSA-VI-NEXT: s_nop 0 1805; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 1806; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s16 1807; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 1808; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s15 1809; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1810; GCN-NOHSA-VI-NEXT: s_nop 0 1811; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 1812; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13 1813; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 1814; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s12 1815; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1816; GCN-NOHSA-VI-NEXT: s_endpgm 1817; 1818; EG-LABEL: constant_zextload_v16i16_to_v16i32: 1819; EG: ; %bb.0: 1820; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 1821; EG-NEXT: TEX 1 @8 1822; EG-NEXT: ALU 35, @13, KC0[CB0:0-32], KC1[] 1823; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0 1824; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0 1825; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0 1826; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1 1827; EG-NEXT: CF_END 1828; EG-NEXT: Fetch clause starting at 8: 1829; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1 1830; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1 1831; EG-NEXT: ALU clause starting at 12: 1832; EG-NEXT: MOV * T11.X, KC0[2].Z, 1833; EG-NEXT: ALU clause starting at 13: 1834; EG-NEXT: LSHR * T13.W, T12.Y, literal.x, 1835; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1836; EG-NEXT: AND_INT * T13.Z, T12.Y, literal.x, 1837; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1838; EG-NEXT: LSHR T13.Y, T12.X, literal.x, 1839; EG-NEXT: LSHR * T14.W, T12.W, literal.x, 1840; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1841; EG-NEXT: AND_INT T13.X, T12.X, literal.x, 1842; EG-NEXT: AND_INT T14.Z, T12.W, literal.x, 1843; EG-NEXT: LSHR * T12.X, KC0[2].Y, literal.y, 1844; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1845; EG-NEXT: LSHR T14.Y, T12.Z, literal.x, 1846; EG-NEXT: LSHR * T15.W, T11.Y, literal.x, 1847; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1848; EG-NEXT: AND_INT T14.X, T12.Z, literal.x, 1849; EG-NEXT: AND_INT T15.Z, T11.Y, literal.x, 1850; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1851; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 1852; EG-NEXT: LSHR T16.X, PV.W, literal.x, 1853; EG-NEXT: LSHR T15.Y, T11.X, literal.y, 1854; EG-NEXT: LSHR T17.W, T11.W, literal.y, 1855; EG-NEXT: AND_INT * T15.X, T11.X, literal.z, 1856; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1857; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1858; EG-NEXT: AND_INT T17.Z, T11.W, literal.x, 1859; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1860; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 1861; EG-NEXT: LSHR T11.X, PV.W, literal.x, 1862; EG-NEXT: LSHR T17.Y, T11.Z, literal.y, 1863; EG-NEXT: AND_INT * T17.X, T11.Z, literal.z, 1864; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1865; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1866; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 1867; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 1868; EG-NEXT: LSHR * T18.X, PV.W, literal.x, 1869; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1870 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in 1871 %ext = zext <16 x i16> %load to <16 x i32> 1872 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 1873 ret void 1874} 1875 1876define amdgpu_kernel void @constant_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 { 1877; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i32: 1878; GCN-NOHSA-SI: ; %bb.0: 1879; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1880; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1881; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 1882; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 1883; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 1884; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1885; GCN-NOHSA-SI-NEXT: s_ashr_i32 s12, s5, 16 1886; GCN-NOHSA-SI-NEXT: s_ashr_i32 s13, s4, 16 1887; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 1888; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 1889; GCN-NOHSA-SI-NEXT: s_ashr_i32 s14, s7, 16 1890; GCN-NOHSA-SI-NEXT: s_ashr_i32 s15, s6, 16 1891; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7 1892; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 1893; GCN-NOHSA-SI-NEXT: s_ashr_i32 s16, s9, 16 1894; GCN-NOHSA-SI-NEXT: s_ashr_i32 s17, s8, 16 1895; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s9, s9 1896; GCN-NOHSA-SI-NEXT: s_ashr_i32 s18, s11, 16 1897; GCN-NOHSA-SI-NEXT: s_ashr_i32 s19, s10, 16 1898; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s11, s11 1899; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s10, s10 1900; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s8, s8 1901; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 1902; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 1903; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 1904; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18 1905; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 1906; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1907; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 1908; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s17 1909; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 1910; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s16 1911; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 1912; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1913; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 1914; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s15 1915; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 1916; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s14 1917; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 1918; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 1919; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 1920; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s13 1921; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 1922; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s12 1923; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 1924; GCN-NOHSA-SI-NEXT: s_endpgm 1925; 1926; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i32: 1927; GCN-HSA: ; %bb.0: 1928; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1929; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1930; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 1931; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1932; GCN-HSA-NEXT: s_ashr_i32 s12, s5, 16 1933; GCN-HSA-NEXT: s_ashr_i32 s13, s4, 16 1934; GCN-HSA-NEXT: s_ashr_i32 s14, s7, 16 1935; GCN-HSA-NEXT: s_ashr_i32 s15, s6, 16 1936; GCN-HSA-NEXT: s_ashr_i32 s16, s9, 16 1937; GCN-HSA-NEXT: s_ashr_i32 s17, s8, 16 1938; GCN-HSA-NEXT: s_ashr_i32 s2, s11, 16 1939; GCN-HSA-NEXT: s_ashr_i32 s3, s10, 16 1940; GCN-HSA-NEXT: v_mov_b32_e32 v3, s2 1941; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 1942; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1943; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1944; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1945; GCN-HSA-NEXT: s_sext_i32_i16 s11, s11 1946; GCN-HSA-NEXT: s_sext_i32_i16 s10, s10 1947; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1948; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 1949; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 1950; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 1951; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1952; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1953; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1954; GCN-HSA-NEXT: s_sext_i32_i16 s9, s9 1955; GCN-HSA-NEXT: s_sext_i32_i16 s8, s8 1956; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1957; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1958; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 1959; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17 1960; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 1961; GCN-HSA-NEXT: v_mov_b32_e32 v3, s16 1962; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1963; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7 1964; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6 1965; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1966; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 1967; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 1968; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15 1969; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 1970; GCN-HSA-NEXT: v_mov_b32_e32 v3, s14 1971; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 1972; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5 1973; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4 1974; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1975; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 1976; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 1977; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13 1978; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 1979; GCN-HSA-NEXT: v_mov_b32_e32 v3, s12 1980; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 1981; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 1982; GCN-HSA-NEXT: s_endpgm 1983; 1984; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i32: 1985; GCN-NOHSA-VI: ; %bb.0: 1986; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1987; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1988; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 1989; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 1990; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 1991; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1992; GCN-NOHSA-VI-NEXT: s_ashr_i32 s18, s11, 16 1993; GCN-NOHSA-VI-NEXT: s_ashr_i32 s19, s10, 16 1994; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s11, s11 1995; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s10, s10 1996; GCN-NOHSA-VI-NEXT: s_ashr_i32 s16, s9, 16 1997; GCN-NOHSA-VI-NEXT: s_ashr_i32 s17, s8, 16 1998; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s9, s9 1999; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s8, s8 2000; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 2001; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19 2002; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 2003; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s18 2004; GCN-NOHSA-VI-NEXT: s_ashr_i32 s14, s7, 16 2005; GCN-NOHSA-VI-NEXT: s_ashr_i32 s15, s6, 16 2006; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 2007; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 2008; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2009; GCN-NOHSA-VI-NEXT: s_ashr_i32 s12, s5, 16 2010; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 2011; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s17 2012; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 2013; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s16 2014; GCN-NOHSA-VI-NEXT: s_ashr_i32 s13, s4, 16 2015; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 2016; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 2017; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2018; GCN-NOHSA-VI-NEXT: s_nop 0 2019; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 2020; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15 2021; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 2022; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s14 2023; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2024; GCN-NOHSA-VI-NEXT: s_nop 0 2025; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 2026; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13 2027; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 2028; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s12 2029; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2030; GCN-NOHSA-VI-NEXT: s_endpgm 2031; 2032; EG-LABEL: constant_sextload_v16i16_to_v16i32: 2033; EG: ; %bb.0: 2034; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2035; EG-NEXT: TEX 1 @8 2036; EG-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[] 2037; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0 2038; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0 2039; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0 2040; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1 2041; EG-NEXT: CF_END 2042; EG-NEXT: Fetch clause starting at 8: 2043; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 2044; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 2045; EG-NEXT: ALU clause starting at 12: 2046; EG-NEXT: MOV * T11.X, KC0[2].Z, 2047; EG-NEXT: ALU clause starting at 13: 2048; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 2049; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2050; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2051; EG-NEXT: LSHR T14.X, PV.W, literal.x, 2052; EG-NEXT: BFE_INT * T15.Z, T11.Y, 0.0, literal.y, 2053; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2054; EG-NEXT: BFE_INT T15.X, T11.X, 0.0, literal.x, 2055; EG-NEXT: LSHR T0.Y, T12.W, literal.x, 2056; EG-NEXT: BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212 2057; EG-NEXT: LSHR T0.W, T12.Y, literal.x, 2058; EG-NEXT: LSHR * T1.W, T11.Y, literal.x, 2059; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2060; EG-NEXT: BFE_INT T16.X, T11.Z, 0.0, literal.x, 2061; EG-NEXT: LSHR T1.Y, T11.W, literal.x, 2062; EG-NEXT: BFE_INT T17.Z, T12.Y, 0.0, literal.x, 2063; EG-NEXT: BFE_INT T15.W, PS, 0.0, literal.x, 2064; EG-NEXT: LSHR * T1.W, T11.X, literal.x, 2065; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2066; EG-NEXT: BFE_INT T17.X, T12.X, 0.0, literal.x, 2067; EG-NEXT: BFE_INT T15.Y, PS, 0.0, literal.x, 2068; EG-NEXT: BFE_INT T18.Z, T12.W, 0.0, literal.x, 2069; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, literal.x, 2070; EG-NEXT: LSHR * T1.W, T11.Z, literal.x, 2071; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2072; EG-NEXT: BFE_INT T18.X, T12.Z, 0.0, literal.x, 2073; EG-NEXT: BFE_INT T16.Y, PS, 0.0, literal.x, 2074; EG-NEXT: LSHR T0.Z, T12.X, literal.x, 2075; EG-NEXT: BFE_INT T17.W, T0.W, 0.0, literal.x, 2076; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2077; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44) 2078; EG-NEXT: LSHR T11.X, PS, literal.x, 2079; EG-NEXT: BFE_INT T17.Y, PV.Z, 0.0, literal.y, 2080; EG-NEXT: LSHR T0.Z, T12.Z, literal.y, 2081; EG-NEXT: BFE_INT T18.W, T0.Y, 0.0, literal.y, 2082; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2083; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2084; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2085; EG-NEXT: LSHR T12.X, PS, literal.x, 2086; EG-NEXT: BFE_INT * T18.Y, PV.Z, 0.0, literal.y, 2087; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2088 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in 2089 %ext = sext <16 x i16> %load to <16 x i32> 2090 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 2091 ret void 2092} 2093 2094define amdgpu_kernel void @constant_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 { 2095; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i32: 2096; GCN-NOHSA-SI: ; %bb.0: 2097; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x9 2098; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2099; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2100; GCN-NOHSA-SI-NEXT: s_mov_b32 s18, 0xffff 2101; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2102; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s1, 16 2103; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s0, 16 2104; GCN-NOHSA-SI-NEXT: s_lshr_b32 s21, s3, 16 2105; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s2, 16 2106; GCN-NOHSA-SI-NEXT: s_lshr_b32 s23, s5, 16 2107; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s4, 16 2108; GCN-NOHSA-SI-NEXT: s_lshr_b32 s25, s7, 16 2109; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s6, 16 2110; GCN-NOHSA-SI-NEXT: s_and_b32 s27, s1, s18 2111; GCN-NOHSA-SI-NEXT: s_and_b32 s28, s0, s18 2112; GCN-NOHSA-SI-NEXT: s_and_b32 s29, s3, s18 2113; GCN-NOHSA-SI-NEXT: s_and_b32 s30, s2, s18 2114; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s18 2115; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s18 2116; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s18 2117; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, s18 2118; GCN-NOHSA-SI-NEXT: s_and_b32 s31, s9, s18 2119; GCN-NOHSA-SI-NEXT: s_and_b32 s33, s8, s18 2120; GCN-NOHSA-SI-NEXT: s_and_b32 s34, s11, s18 2121; GCN-NOHSA-SI-NEXT: s_and_b32 s35, s10, s18 2122; GCN-NOHSA-SI-NEXT: s_and_b32 s36, s13, s18 2123; GCN-NOHSA-SI-NEXT: s_and_b32 s37, s12, s18 2124; GCN-NOHSA-SI-NEXT: s_and_b32 s38, s15, s18 2125; GCN-NOHSA-SI-NEXT: s_and_b32 s18, s14, s18 2126; GCN-NOHSA-SI-NEXT: s_lshr_b32 s9, s9, 16 2127; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s8, 16 2128; GCN-NOHSA-SI-NEXT: s_lshr_b32 s11, s11, 16 2129; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s10, 16 2130; GCN-NOHSA-SI-NEXT: s_lshr_b32 s13, s13, 16 2131; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s12, 16 2132; GCN-NOHSA-SI-NEXT: s_lshr_b32 s15, s15, 16 2133; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s14, 16 2134; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2135; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2136; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 2137; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 2138; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 2139; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s14 2140; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s38 2141; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s15 2142; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 2143; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2144; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s37 2145; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s12 2146; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s36 2147; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13 2148; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 2149; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2150; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s35 2151; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s10 2152; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s34 2153; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s11 2154; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 2155; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2156; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s33 2157; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s8 2158; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s31 2159; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9 2160; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 2161; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2162; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 2163; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s26 2164; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 2165; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s25 2166; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2167; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2168; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 2169; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s24 2170; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 2171; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s23 2172; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2173; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2174; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s30 2175; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s22 2176; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s29 2177; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s21 2178; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2179; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2180; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s28 2181; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s20 2182; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s27 2183; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s19 2184; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2185; GCN-NOHSA-SI-NEXT: s_endpgm 2186; 2187; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i32: 2188; GCN-HSA: ; %bb.0: 2189; GCN-HSA-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x0 2190; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2191; GCN-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2192; GCN-HSA-NEXT: s_mov_b32 s18, 0xffff 2193; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2194; GCN-HSA-NEXT: s_and_b32 s19, s1, s18 2195; GCN-HSA-NEXT: s_and_b32 s20, s0, s18 2196; GCN-HSA-NEXT: s_and_b32 s21, s3, s18 2197; GCN-HSA-NEXT: s_and_b32 s22, s2, s18 2198; GCN-HSA-NEXT: s_and_b32 s23, s5, s18 2199; GCN-HSA-NEXT: s_and_b32 s24, s4, s18 2200; GCN-HSA-NEXT: s_and_b32 s25, s7, s18 2201; GCN-HSA-NEXT: s_and_b32 s26, s6, s18 2202; GCN-HSA-NEXT: s_and_b32 s27, s9, s18 2203; GCN-HSA-NEXT: s_and_b32 s28, s8, s18 2204; GCN-HSA-NEXT: s_and_b32 s29, s11, s18 2205; GCN-HSA-NEXT: s_and_b32 s30, s10, s18 2206; GCN-HSA-NEXT: s_and_b32 s31, s13, s18 2207; GCN-HSA-NEXT: s_and_b32 s33, s12, s18 2208; GCN-HSA-NEXT: s_and_b32 s34, s15, s18 2209; GCN-HSA-NEXT: s_and_b32 s18, s14, s18 2210; GCN-HSA-NEXT: s_lshr_b32 s35, s1, 16 2211; GCN-HSA-NEXT: s_lshr_b32 s36, s0, 16 2212; GCN-HSA-NEXT: s_lshr_b32 s3, s3, 16 2213; GCN-HSA-NEXT: s_lshr_b32 s2, s2, 16 2214; GCN-HSA-NEXT: s_lshr_b32 s5, s5, 16 2215; GCN-HSA-NEXT: s_lshr_b32 s4, s4, 16 2216; GCN-HSA-NEXT: s_lshr_b32 s7, s7, 16 2217; GCN-HSA-NEXT: s_lshr_b32 s6, s6, 16 2218; GCN-HSA-NEXT: s_lshr_b32 s9, s9, 16 2219; GCN-HSA-NEXT: s_lshr_b32 s8, s8, 16 2220; GCN-HSA-NEXT: s_lshr_b32 s11, s11, 16 2221; GCN-HSA-NEXT: s_lshr_b32 s10, s10, 16 2222; GCN-HSA-NEXT: s_lshr_b32 s13, s13, 16 2223; GCN-HSA-NEXT: s_lshr_b32 s12, s12, 16 2224; GCN-HSA-NEXT: s_lshr_b32 s15, s15, 16 2225; GCN-HSA-NEXT: s_lshr_b32 s14, s14, 16 2226; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x70 2227; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 2228; GCN-HSA-NEXT: v_mov_b32_e32 v9, s1 2229; GCN-HSA-NEXT: v_mov_b32_e32 v8, s0 2230; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x60 2231; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 2232; GCN-HSA-NEXT: v_mov_b32_e32 v11, s1 2233; GCN-HSA-NEXT: v_mov_b32_e32 v10, s0 2234; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x50 2235; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18 2236; GCN-HSA-NEXT: v_mov_b32_e32 v1, s14 2237; GCN-HSA-NEXT: v_mov_b32_e32 v2, s34 2238; GCN-HSA-NEXT: v_mov_b32_e32 v3, s15 2239; GCN-HSA-NEXT: v_mov_b32_e32 v4, s33 2240; GCN-HSA-NEXT: v_mov_b32_e32 v5, s12 2241; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 2242; GCN-HSA-NEXT: v_mov_b32_e32 v6, s31 2243; GCN-HSA-NEXT: v_mov_b32_e32 v7, s13 2244; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 2245; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 2246; GCN-HSA-NEXT: v_mov_b32_e32 v0, s30 2247; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2248; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2249; GCN-HSA-NEXT: s_add_u32 s0, s16, 64 2250; GCN-HSA-NEXT: v_mov_b32_e32 v1, s10 2251; GCN-HSA-NEXT: v_mov_b32_e32 v2, s29 2252; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11 2253; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 2254; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2255; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2256; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2257; GCN-HSA-NEXT: s_add_u32 s0, s16, 48 2258; GCN-HSA-NEXT: v_mov_b32_e32 v0, s28 2259; GCN-HSA-NEXT: v_mov_b32_e32 v1, s8 2260; GCN-HSA-NEXT: v_mov_b32_e32 v2, s27 2261; GCN-HSA-NEXT: v_mov_b32_e32 v3, s9 2262; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 2263; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2264; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2265; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2266; GCN-HSA-NEXT: s_add_u32 s0, s16, 32 2267; GCN-HSA-NEXT: v_mov_b32_e32 v0, s26 2268; GCN-HSA-NEXT: v_mov_b32_e32 v1, s6 2269; GCN-HSA-NEXT: v_mov_b32_e32 v2, s25 2270; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 2271; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 2272; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2273; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2274; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2275; GCN-HSA-NEXT: s_add_u32 s0, s16, 16 2276; GCN-HSA-NEXT: v_mov_b32_e32 v0, s24 2277; GCN-HSA-NEXT: v_mov_b32_e32 v1, s4 2278; GCN-HSA-NEXT: v_mov_b32_e32 v2, s23 2279; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5 2280; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 2281; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2282; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2283; GCN-HSA-NEXT: v_mov_b32_e32 v0, s22 2284; GCN-HSA-NEXT: v_mov_b32_e32 v1, s2 2285; GCN-HSA-NEXT: v_mov_b32_e32 v2, s21 2286; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 2287; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2288; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2289; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 2290; GCN-HSA-NEXT: v_mov_b32_e32 v0, s20 2291; GCN-HSA-NEXT: v_mov_b32_e32 v1, s36 2292; GCN-HSA-NEXT: v_mov_b32_e32 v2, s19 2293; GCN-HSA-NEXT: v_mov_b32_e32 v3, s35 2294; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17 2295; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2296; GCN-HSA-NEXT: s_endpgm 2297; 2298; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i32: 2299; GCN-NOHSA-VI: ; %bb.0: 2300; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[20:23], s[0:1], 0x24 2301; GCN-NOHSA-VI-NEXT: s_mov_b32 s19, 0xf000 2302; GCN-NOHSA-VI-NEXT: s_mov_b32 s18, -1 2303; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2304; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[22:23], 0x0 2305; GCN-NOHSA-VI-NEXT: s_mov_b32 s22, 0xffff 2306; GCN-NOHSA-VI-NEXT: s_mov_b32 s16, s20 2307; GCN-NOHSA-VI-NEXT: s_mov_b32 s17, s21 2308; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2309; GCN-NOHSA-VI-NEXT: s_lshr_b32 s36, s15, 16 2310; GCN-NOHSA-VI-NEXT: s_and_b32 s15, s15, s22 2311; GCN-NOHSA-VI-NEXT: s_lshr_b32 s37, s14, 16 2312; GCN-NOHSA-VI-NEXT: s_and_b32 s14, s14, s22 2313; GCN-NOHSA-VI-NEXT: s_lshr_b32 s34, s13, 16 2314; GCN-NOHSA-VI-NEXT: s_and_b32 s13, s13, s22 2315; GCN-NOHSA-VI-NEXT: s_lshr_b32 s35, s12, 16 2316; GCN-NOHSA-VI-NEXT: s_and_b32 s12, s12, s22 2317; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 2318; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s37 2319; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15 2320; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s36 2321; GCN-NOHSA-VI-NEXT: s_lshr_b32 s31, s11, 16 2322; GCN-NOHSA-VI-NEXT: s_and_b32 s11, s11, s22 2323; GCN-NOHSA-VI-NEXT: s_lshr_b32 s33, s10, 16 2324; GCN-NOHSA-VI-NEXT: s_and_b32 s10, s10, s22 2325; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112 2326; GCN-NOHSA-VI-NEXT: s_lshr_b32 s29, s9, 16 2327; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 2328; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s35 2329; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 2330; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s34 2331; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s9, s22 2332; GCN-NOHSA-VI-NEXT: s_lshr_b32 s30, s8, 16 2333; GCN-NOHSA-VI-NEXT: s_and_b32 s8, s8, s22 2334; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96 2335; GCN-NOHSA-VI-NEXT: s_lshr_b32 s27, s7, 16 2336; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 2337; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s33 2338; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 2339; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s31 2340; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, s22 2341; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s6, 16 2342; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, s22 2343; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80 2344; GCN-NOHSA-VI-NEXT: s_lshr_b32 s25, s5, 16 2345; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 2346; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s30 2347; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 2348; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s29 2349; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, s22 2350; GCN-NOHSA-VI-NEXT: s_lshr_b32 s26, s4, 16 2351; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, s22 2352; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:64 2353; GCN-NOHSA-VI-NEXT: s_lshr_b32 s23, s3, 16 2354; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 2355; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s28 2356; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 2357; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s27 2358; GCN-NOHSA-VI-NEXT: s_and_b32 s3, s3, s22 2359; GCN-NOHSA-VI-NEXT: s_lshr_b32 s24, s2, 16 2360; GCN-NOHSA-VI-NEXT: s_and_b32 s2, s2, s22 2361; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48 2362; GCN-NOHSA-VI-NEXT: s_lshr_b32 s20, s1, 16 2363; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 2364; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s26 2365; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 2366; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s25 2367; GCN-NOHSA-VI-NEXT: s_and_b32 s1, s1, s22 2368; GCN-NOHSA-VI-NEXT: s_lshr_b32 s21, s0, 16 2369; GCN-NOHSA-VI-NEXT: s_and_b32 s0, s0, s22 2370; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32 2371; GCN-NOHSA-VI-NEXT: s_nop 0 2372; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 2373; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s24 2374; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s3 2375; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s23 2376; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16 2377; GCN-NOHSA-VI-NEXT: s_nop 0 2378; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 2379; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s21 2380; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s1 2381; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s20 2382; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 2383; GCN-NOHSA-VI-NEXT: s_endpgm 2384; 2385; EG-LABEL: constant_zextload_v32i16_to_v32i32: 2386; EG: ; %bb.0: 2387; EG-NEXT: ALU 0, @20, KC0[CB0:0-32], KC1[] 2388; EG-NEXT: TEX 3 @12 2389; EG-NEXT: ALU 71, @21, KC0[CB0:0-32], KC1[] 2390; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0 2391; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0 2392; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T32.X, 0 2393; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T22.X, 0 2394; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T29.X, 0 2395; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T19.X, 0 2396; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T26.X, 0 2397; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T20.X, 1 2398; EG-NEXT: CF_END 2399; EG-NEXT: Fetch clause starting at 12: 2400; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 2401; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 48, #1 2402; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1 2403; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 16, #1 2404; EG-NEXT: ALU clause starting at 20: 2405; EG-NEXT: MOV * T19.X, KC0[2].Z, 2406; EG-NEXT: ALU clause starting at 21: 2407; EG-NEXT: LSHR * T23.W, T20.Y, literal.x, 2408; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2409; EG-NEXT: AND_INT * T23.Z, T20.Y, literal.x, 2410; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2411; EG-NEXT: LSHR T23.Y, T20.X, literal.x, 2412; EG-NEXT: LSHR * T24.W, T20.W, literal.x, 2413; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2414; EG-NEXT: AND_INT T23.X, T20.X, literal.x, 2415; EG-NEXT: AND_INT T24.Z, T20.W, literal.x, 2416; EG-NEXT: LSHR * T20.X, KC0[2].Y, literal.y, 2417; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 2418; EG-NEXT: LSHR T24.Y, T20.Z, literal.x, 2419; EG-NEXT: LSHR * T25.W, T19.Y, literal.x, 2420; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2421; EG-NEXT: AND_INT T24.X, T20.Z, literal.x, 2422; EG-NEXT: AND_INT T25.Z, T19.Y, literal.x, 2423; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2424; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 2425; EG-NEXT: LSHR T26.X, PV.W, literal.x, 2426; EG-NEXT: LSHR T25.Y, T19.X, literal.y, 2427; EG-NEXT: LSHR T27.W, T19.W, literal.y, 2428; EG-NEXT: AND_INT * T25.X, T19.X, literal.z, 2429; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2430; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2431; EG-NEXT: AND_INT T27.Z, T19.W, literal.x, 2432; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2433; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2434; EG-NEXT: LSHR T19.X, PV.W, literal.x, 2435; EG-NEXT: LSHR T27.Y, T19.Z, literal.y, 2436; EG-NEXT: LSHR T28.W, T22.Y, literal.y, 2437; EG-NEXT: AND_INT * T27.X, T19.Z, literal.z, 2438; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2439; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2440; EG-NEXT: AND_INT T28.Z, T22.Y, literal.x, 2441; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2442; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 2443; EG-NEXT: LSHR T29.X, PV.W, literal.x, 2444; EG-NEXT: LSHR T28.Y, T22.X, literal.y, 2445; EG-NEXT: LSHR T30.W, T22.W, literal.y, 2446; EG-NEXT: AND_INT * T28.X, T22.X, literal.z, 2447; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2448; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2449; EG-NEXT: AND_INT T30.Z, T22.W, literal.x, 2450; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2451; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 2452; EG-NEXT: LSHR T22.X, PV.W, literal.x, 2453; EG-NEXT: LSHR T30.Y, T22.Z, literal.y, 2454; EG-NEXT: LSHR T31.W, T21.Y, literal.y, 2455; EG-NEXT: AND_INT * T30.X, T22.Z, literal.z, 2456; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2457; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2458; EG-NEXT: AND_INT T31.Z, T21.Y, literal.x, 2459; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2460; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 2461; EG-NEXT: LSHR T32.X, PV.W, literal.x, 2462; EG-NEXT: LSHR T31.Y, T21.X, literal.y, 2463; EG-NEXT: LSHR T33.W, T21.W, literal.y, 2464; EG-NEXT: AND_INT * T31.X, T21.X, literal.z, 2465; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2466; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2467; EG-NEXT: AND_INT T33.Z, T21.W, literal.x, 2468; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2469; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 2470; EG-NEXT: LSHR T21.X, PV.W, literal.x, 2471; EG-NEXT: LSHR T33.Y, T21.Z, literal.y, 2472; EG-NEXT: AND_INT * T33.X, T21.Z, literal.z, 2473; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2474; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2475; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2476; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 2477; EG-NEXT: LSHR * T34.X, PV.W, literal.x, 2478; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2479 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in 2480 %ext = zext <32 x i16> %load to <32 x i32> 2481 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 2482 ret void 2483} 2484 2485define amdgpu_kernel void @constant_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 { 2486; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i32: 2487; GCN-NOHSA-SI: ; %bb.0: 2488; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x9 2489; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2490; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2491; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2492; GCN-NOHSA-SI-NEXT: s_ashr_i32 s18, s1, 16 2493; GCN-NOHSA-SI-NEXT: s_ashr_i32 s19, s0, 16 2494; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s20, s1 2495; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s21, s0 2496; GCN-NOHSA-SI-NEXT: s_ashr_i32 s22, s3, 16 2497; GCN-NOHSA-SI-NEXT: s_ashr_i32 s23, s2, 16 2498; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s24, s3 2499; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s25, s2 2500; GCN-NOHSA-SI-NEXT: s_ashr_i32 s26, s5, 16 2501; GCN-NOHSA-SI-NEXT: s_ashr_i32 s27, s4, 16 2502; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 2503; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 2504; GCN-NOHSA-SI-NEXT: s_ashr_i32 s28, s7, 16 2505; GCN-NOHSA-SI-NEXT: s_ashr_i32 s29, s6, 16 2506; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7 2507; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 2508; GCN-NOHSA-SI-NEXT: s_ashr_i32 s30, s9, 16 2509; GCN-NOHSA-SI-NEXT: s_ashr_i32 s31, s8, 16 2510; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s9, s9 2511; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s8, s8 2512; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s11, 16 2513; GCN-NOHSA-SI-NEXT: s_ashr_i32 s34, s10, 16 2514; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s11, s11 2515; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s10, s10 2516; GCN-NOHSA-SI-NEXT: s_ashr_i32 s35, s13, 16 2517; GCN-NOHSA-SI-NEXT: s_ashr_i32 s36, s12, 16 2518; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s13, s13 2519; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s12, s12 2520; GCN-NOHSA-SI-NEXT: s_ashr_i32 s37, s15, 16 2521; GCN-NOHSA-SI-NEXT: s_ashr_i32 s38, s14, 16 2522; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s15, s15 2523; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s14, s14 2524; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2525; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2526; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 2527; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 2528; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 2529; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s38 2530; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 2531; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s37 2532; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 2533; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2534; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 2535; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s36 2536; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 2537; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s35 2538; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 2539; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2540; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 2541; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s34 2542; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 2543; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s33 2544; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 2545; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2546; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 2547; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s31 2548; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 2549; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s30 2550; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 2551; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2552; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 2553; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s29 2554; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 2555; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s28 2556; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2557; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2558; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 2559; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s27 2560; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 2561; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s26 2562; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2563; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2564; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s25 2565; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s23 2566; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24 2567; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s22 2568; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 2569; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2570; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s21 2571; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 2572; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20 2573; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18 2574; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 2575; GCN-NOHSA-SI-NEXT: s_endpgm 2576; 2577; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i32: 2578; GCN-HSA: ; %bb.0: 2579; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2580; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2581; GCN-HSA-NEXT: s_load_dwordx16 s[4:19], s[2:3], 0x0 2582; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2583; GCN-HSA-NEXT: s_ashr_i32 s20, s5, 16 2584; GCN-HSA-NEXT: s_ashr_i32 s21, s4, 16 2585; GCN-HSA-NEXT: s_ashr_i32 s22, s7, 16 2586; GCN-HSA-NEXT: s_ashr_i32 s23, s6, 16 2587; GCN-HSA-NEXT: s_ashr_i32 s24, s9, 16 2588; GCN-HSA-NEXT: s_ashr_i32 s25, s8, 16 2589; GCN-HSA-NEXT: s_ashr_i32 s26, s11, 16 2590; GCN-HSA-NEXT: s_ashr_i32 s27, s10, 16 2591; GCN-HSA-NEXT: s_ashr_i32 s28, s13, 16 2592; GCN-HSA-NEXT: s_ashr_i32 s29, s12, 16 2593; GCN-HSA-NEXT: s_ashr_i32 s30, s15, 16 2594; GCN-HSA-NEXT: s_ashr_i32 s31, s14, 16 2595; GCN-HSA-NEXT: s_ashr_i32 s33, s17, 16 2596; GCN-HSA-NEXT: s_ashr_i32 s34, s16, 16 2597; GCN-HSA-NEXT: s_ashr_i32 s35, s19, 16 2598; GCN-HSA-NEXT: s_ashr_i32 s36, s18, 16 2599; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 2600; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2601; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 2602; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 2603; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 2604; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2605; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3 2606; GCN-HSA-NEXT: s_sext_i32_i16 s16, s16 2607; GCN-HSA-NEXT: s_sext_i32_i16 s19, s19 2608; GCN-HSA-NEXT: s_sext_i32_i16 s18, s18 2609; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2 2610; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 2611; GCN-HSA-NEXT: s_sext_i32_i16 s17, s17 2612; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18 2613; GCN-HSA-NEXT: v_mov_b32_e32 v1, s36 2614; GCN-HSA-NEXT: v_mov_b32_e32 v2, s19 2615; GCN-HSA-NEXT: v_mov_b32_e32 v3, s35 2616; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 2617; GCN-HSA-NEXT: v_mov_b32_e32 v5, s34 2618; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2619; GCN-HSA-NEXT: v_mov_b32_e32 v6, s17 2620; GCN-HSA-NEXT: v_mov_b32_e32 v7, s33 2621; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 2622; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 2623; GCN-HSA-NEXT: s_sext_i32_i16 s15, s15 2624; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2625; GCN-HSA-NEXT: s_sext_i32_i16 s14, s14 2626; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2627; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 2628; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 2629; GCN-HSA-NEXT: v_mov_b32_e32 v1, s31 2630; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15 2631; GCN-HSA-NEXT: v_mov_b32_e32 v3, s30 2632; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2633; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2634; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2635; GCN-HSA-NEXT: s_sext_i32_i16 s13, s13 2636; GCN-HSA-NEXT: s_sext_i32_i16 s12, s12 2637; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2638; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 2639; GCN-HSA-NEXT: v_mov_b32_e32 v0, s12 2640; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29 2641; GCN-HSA-NEXT: v_mov_b32_e32 v2, s13 2642; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28 2643; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2644; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2645; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2646; GCN-HSA-NEXT: s_sext_i32_i16 s11, s11 2647; GCN-HSA-NEXT: s_sext_i32_i16 s10, s10 2648; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2649; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 2650; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 2651; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27 2652; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 2653; GCN-HSA-NEXT: v_mov_b32_e32 v3, s26 2654; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2655; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2656; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2657; GCN-HSA-NEXT: s_sext_i32_i16 s9, s9 2658; GCN-HSA-NEXT: s_sext_i32_i16 s8, s8 2659; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2660; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2661; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 2662; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 2663; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 2664; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24 2665; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2666; GCN-HSA-NEXT: s_sext_i32_i16 s7, s7 2667; GCN-HSA-NEXT: s_sext_i32_i16 s6, s6 2668; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2669; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2670; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 2671; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23 2672; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 2673; GCN-HSA-NEXT: v_mov_b32_e32 v3, s22 2674; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2675; GCN-HSA-NEXT: s_sext_i32_i16 s5, s5 2676; GCN-HSA-NEXT: s_sext_i32_i16 s4, s4 2677; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2678; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 2679; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 2680; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 2681; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 2682; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20 2683; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 2684; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 2685; GCN-HSA-NEXT: s_endpgm 2686; 2687; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i32: 2688; GCN-NOHSA-VI: ; %bb.0: 2689; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x24 2690; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2691; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2692; GCN-NOHSA-VI-NEXT: s_mov_b32 s19, 0xf000 2693; GCN-NOHSA-VI-NEXT: s_mov_b32 s18, -1 2694; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2695; GCN-NOHSA-VI-NEXT: s_ashr_i32 s35, s15, 16 2696; GCN-NOHSA-VI-NEXT: s_ashr_i32 s36, s14, 16 2697; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s15, s15 2698; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s14, s14 2699; GCN-NOHSA-VI-NEXT: s_ashr_i32 s33, s13, 16 2700; GCN-NOHSA-VI-NEXT: s_ashr_i32 s34, s12, 16 2701; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s13, s13 2702; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s12, s12 2703; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 2704; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s36 2705; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15 2706; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 2707; GCN-NOHSA-VI-NEXT: s_ashr_i32 s30, s11, 16 2708; GCN-NOHSA-VI-NEXT: s_ashr_i32 s31, s10, 16 2709; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s11, s11 2710; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s10, s10 2711; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112 2712; GCN-NOHSA-VI-NEXT: s_ashr_i32 s28, s9, 16 2713; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 2714; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s34 2715; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 2716; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33 2717; GCN-NOHSA-VI-NEXT: s_ashr_i32 s29, s8, 16 2718; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s9, s9 2719; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s8, s8 2720; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96 2721; GCN-NOHSA-VI-NEXT: s_ashr_i32 s26, s7, 16 2722; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 2723; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31 2724; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 2725; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s30 2726; GCN-NOHSA-VI-NEXT: s_ashr_i32 s27, s6, 16 2727; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 2728; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 2729; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80 2730; GCN-NOHSA-VI-NEXT: s_ashr_i32 s24, s5, 16 2731; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 2732; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29 2733; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 2734; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s28 2735; GCN-NOHSA-VI-NEXT: s_ashr_i32 s25, s4, 16 2736; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 2737; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 2738; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:64 2739; GCN-NOHSA-VI-NEXT: s_ashr_i32 s22, s3, 16 2740; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 2741; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s27 2742; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 2743; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s26 2744; GCN-NOHSA-VI-NEXT: s_ashr_i32 s23, s2, 16 2745; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s3, s3 2746; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s2, s2 2747; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48 2748; GCN-NOHSA-VI-NEXT: s_ashr_i32 s20, s1, 16 2749; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 2750; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25 2751; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 2752; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s24 2753; GCN-NOHSA-VI-NEXT: s_ashr_i32 s21, s0, 16 2754; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s1, s1 2755; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s0, s0 2756; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32 2757; GCN-NOHSA-VI-NEXT: s_nop 0 2758; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 2759; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23 2760; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s3 2761; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s22 2762; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16 2763; GCN-NOHSA-VI-NEXT: s_nop 0 2764; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 2765; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s21 2766; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s1 2767; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s20 2768; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 2769; GCN-NOHSA-VI-NEXT: s_endpgm 2770; 2771; EG-LABEL: constant_sextload_v32i16_to_v32i32: 2772; EG: ; %bb.0: 2773; EG-NEXT: ALU 8, @20, KC0[CB0:0-32], KC1[] 2774; EG-NEXT: TEX 3 @12 2775; EG-NEXT: ALU 73, @29, KC0[CB0:0-32], KC1[] 2776; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T24.X, 0 2777; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T22.X, 0 2778; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T28.X, 0 2779; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0 2780; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T26.X, 0 2781; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0 2782; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T20.X, 0 2783; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 1 2784; EG-NEXT: CF_END 2785; EG-NEXT: Fetch clause starting at 12: 2786; EG-NEXT: VTX_READ_128 T23.XYZW, T22.X, 16, #1 2787; EG-NEXT: VTX_READ_128 T24.XYZW, T22.X, 32, #1 2788; EG-NEXT: VTX_READ_128 T25.XYZW, T22.X, 0, #1 2789; EG-NEXT: VTX_READ_128 T22.XYZW, T22.X, 48, #1 2790; EG-NEXT: ALU clause starting at 20: 2791; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x, 2792; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2793; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2794; EG-NEXT: LSHR T20.X, PV.W, literal.x, 2795; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2796; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 2797; EG-NEXT: LSHR T21.X, PV.W, literal.x, 2798; EG-NEXT: MOV * T22.X, KC0[2].Z, 2799; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2800; EG-NEXT: ALU clause starting at 29: 2801; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2802; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2803; EG-NEXT: LSHR T26.X, PV.W, literal.x, 2804; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2805; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 2806; EG-NEXT: LSHR T27.X, PV.W, literal.x, 2807; EG-NEXT: LSHR T0.W, T22.W, literal.y, 2808; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 2809; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2810; EG-NEXT: 80(1.121039e-43), 0(0.000000e+00) 2811; EG-NEXT: LSHR T28.X, PS, literal.x, 2812; EG-NEXT: LSHR T0.Y, T22.Y, literal.y, 2813; EG-NEXT: BFE_INT T29.Z, T25.Y, 0.0, literal.y, BS:VEC_120/SCL_212 2814; EG-NEXT: LSHR T1.W, T24.W, literal.y, 2815; EG-NEXT: LSHR * T2.W, T24.Y, literal.y, 2816; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2817; EG-NEXT: BFE_INT T29.X, T25.X, 0.0, literal.x, 2818; EG-NEXT: LSHR T1.Y, T23.W, literal.x, 2819; EG-NEXT: BFE_INT T30.Z, T25.W, 0.0, literal.x, BS:VEC_120/SCL_212 2820; EG-NEXT: LSHR T3.W, T23.Y, literal.x, 2821; EG-NEXT: LSHR * T4.W, T25.Y, literal.x, 2822; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2823; EG-NEXT: BFE_INT T30.X, T25.Z, 0.0, literal.x, 2824; EG-NEXT: LSHR T2.Y, T25.W, literal.x, 2825; EG-NEXT: BFE_INT T31.Z, T23.Y, 0.0, literal.x, 2826; EG-NEXT: BFE_INT T29.W, PS, 0.0, literal.x, 2827; EG-NEXT: LSHR * T4.W, T25.X, literal.x, 2828; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2829; EG-NEXT: BFE_INT T31.X, T23.X, 0.0, literal.x, 2830; EG-NEXT: BFE_INT T29.Y, PS, 0.0, literal.x, 2831; EG-NEXT: BFE_INT T32.Z, T23.W, 0.0, literal.x, 2832; EG-NEXT: BFE_INT T30.W, PV.Y, 0.0, literal.x, 2833; EG-NEXT: LSHR * T4.W, T25.Z, literal.x, 2834; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2835; EG-NEXT: BFE_INT T32.X, T23.Z, 0.0, literal.x, 2836; EG-NEXT: BFE_INT T30.Y, PS, 0.0, literal.x, 2837; EG-NEXT: BFE_INT T25.Z, T24.Y, 0.0, literal.x, 2838; EG-NEXT: BFE_INT T31.W, T3.W, 0.0, literal.x, 2839; EG-NEXT: LSHR * T3.W, T23.X, literal.x, 2840; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2841; EG-NEXT: BFE_INT T25.X, T24.X, 0.0, literal.x, 2842; EG-NEXT: BFE_INT T31.Y, PS, 0.0, literal.x, 2843; EG-NEXT: BFE_INT T33.Z, T24.W, 0.0, literal.x, 2844; EG-NEXT: BFE_INT T32.W, T1.Y, 0.0, literal.x, 2845; EG-NEXT: LSHR * T3.W, T23.Z, literal.x, 2846; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2847; EG-NEXT: BFE_INT T33.X, T24.Z, 0.0, literal.x, 2848; EG-NEXT: BFE_INT T32.Y, PS, 0.0, literal.x, 2849; EG-NEXT: BFE_INT T23.Z, T22.Y, 0.0, literal.x, 2850; EG-NEXT: BFE_INT T25.W, T2.W, 0.0, literal.x, 2851; EG-NEXT: LSHR * T2.W, T24.X, literal.x, 2852; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2853; EG-NEXT: BFE_INT T23.X, T22.X, 0.0, literal.x, 2854; EG-NEXT: BFE_INT T25.Y, PS, 0.0, literal.x, 2855; EG-NEXT: BFE_INT T34.Z, T22.W, 0.0, literal.x, 2856; EG-NEXT: BFE_INT T33.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212 2857; EG-NEXT: LSHR * T1.W, T24.Z, literal.x, 2858; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2859; EG-NEXT: BFE_INT T34.X, T22.Z, 0.0, literal.x, 2860; EG-NEXT: BFE_INT T33.Y, PS, 0.0, literal.x, 2861; EG-NEXT: LSHR T0.Z, T22.X, literal.x, 2862; EG-NEXT: BFE_INT T23.W, T0.Y, 0.0, literal.x, 2863; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 2864; EG-NEXT: 16(2.242078e-44), 96(1.345247e-43) 2865; EG-NEXT: LSHR T22.X, PS, literal.x, 2866; EG-NEXT: BFE_INT T23.Y, PV.Z, 0.0, literal.y, 2867; EG-NEXT: LSHR T0.Z, T22.Z, literal.y, 2868; EG-NEXT: BFE_INT T34.W, T0.W, 0.0, literal.y, 2869; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2870; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2871; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 2872; EG-NEXT: LSHR T24.X, PS, literal.x, 2873; EG-NEXT: BFE_INT * T34.Y, PV.Z, 0.0, literal.y, 2874; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2875 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in 2876 %ext = sext <32 x i16> %load to <32 x i32> 2877 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 2878 ret void 2879} 2880 2881define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 { 2882; GCN-NOHSA-SI-LABEL: constant_zextload_v64i16_to_v64i32: 2883; GCN-NOHSA-SI: ; %bb.0: 2884; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x9 2885; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2886; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 2887; GCN-NOHSA-SI-NEXT: s_mov_b32 s20, 0xffff 2888; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[36:51], s[18:19], 0x10 2889; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2890; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s1, 16 2891; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s0, 16 2892; GCN-NOHSA-SI-NEXT: s_lshr_b32 s21, s3, 16 2893; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s2, 16 2894; GCN-NOHSA-SI-NEXT: s_lshr_b32 s23, s5, 16 2895; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s4, 16 2896; GCN-NOHSA-SI-NEXT: s_lshr_b32 s25, s7, 16 2897; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s6, 16 2898; GCN-NOHSA-SI-NEXT: s_lshr_b32 s27, s9, 16 2899; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s8, 16 2900; GCN-NOHSA-SI-NEXT: s_lshr_b32 s29, s11, 16 2901; GCN-NOHSA-SI-NEXT: s_lshr_b32 s30, s10, 16 2902; GCN-NOHSA-SI-NEXT: s_lshr_b32 s31, s13, 16 2903; GCN-NOHSA-SI-NEXT: s_lshr_b32 s33, s12, 16 2904; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s15, 16 2905; GCN-NOHSA-SI-NEXT: s_lshr_b32 s35, s14, 16 2906; GCN-NOHSA-SI-NEXT: s_and_b32 s52, s1, s20 2907; GCN-NOHSA-SI-NEXT: s_and_b32 s53, s0, s20 2908; GCN-NOHSA-SI-NEXT: s_and_b32 s54, s3, s20 2909; GCN-NOHSA-SI-NEXT: s_and_b32 s55, s2, s20 2910; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s20 2911; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s20 2912; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s20 2913; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, s20 2914; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, s20 2915; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, s20 2916; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, s20 2917; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, s20 2918; GCN-NOHSA-SI-NEXT: s_and_b32 s13, s13, s20 2919; GCN-NOHSA-SI-NEXT: s_and_b32 s12, s12, s20 2920; GCN-NOHSA-SI-NEXT: s_and_b32 s15, s15, s20 2921; GCN-NOHSA-SI-NEXT: s_and_b32 s14, s14, s20 2922; GCN-NOHSA-SI-NEXT: s_and_b32 s56, s37, s20 2923; GCN-NOHSA-SI-NEXT: s_and_b32 s57, s36, s20 2924; GCN-NOHSA-SI-NEXT: s_and_b32 s58, s39, s20 2925; GCN-NOHSA-SI-NEXT: s_and_b32 s59, s38, s20 2926; GCN-NOHSA-SI-NEXT: s_and_b32 s60, s41, s20 2927; GCN-NOHSA-SI-NEXT: s_and_b32 s61, s40, s20 2928; GCN-NOHSA-SI-NEXT: s_and_b32 s62, s43, s20 2929; GCN-NOHSA-SI-NEXT: s_and_b32 s63, s42, s20 2930; GCN-NOHSA-SI-NEXT: s_and_b32 s64, s45, s20 2931; GCN-NOHSA-SI-NEXT: s_and_b32 s65, s44, s20 2932; GCN-NOHSA-SI-NEXT: s_and_b32 s66, s47, s20 2933; GCN-NOHSA-SI-NEXT: s_and_b32 s67, s46, s20 2934; GCN-NOHSA-SI-NEXT: s_and_b32 s68, s49, s20 2935; GCN-NOHSA-SI-NEXT: s_and_b32 s69, s48, s20 2936; GCN-NOHSA-SI-NEXT: s_and_b32 s70, s51, s20 2937; GCN-NOHSA-SI-NEXT: s_and_b32 s20, s50, s20 2938; GCN-NOHSA-SI-NEXT: s_lshr_b32 s37, s37, 16 2939; GCN-NOHSA-SI-NEXT: s_lshr_b32 s36, s36, 16 2940; GCN-NOHSA-SI-NEXT: s_lshr_b32 s39, s39, 16 2941; GCN-NOHSA-SI-NEXT: s_lshr_b32 s38, s38, 16 2942; GCN-NOHSA-SI-NEXT: s_lshr_b32 s41, s41, 16 2943; GCN-NOHSA-SI-NEXT: s_lshr_b32 s40, s40, 16 2944; GCN-NOHSA-SI-NEXT: s_lshr_b32 s42, s42, 16 2945; GCN-NOHSA-SI-NEXT: s_lshr_b32 s45, s45, 16 2946; GCN-NOHSA-SI-NEXT: s_lshr_b32 s44, s44, 16 2947; GCN-NOHSA-SI-NEXT: s_lshr_b32 s47, s47, 16 2948; GCN-NOHSA-SI-NEXT: s_lshr_b32 s46, s46, 16 2949; GCN-NOHSA-SI-NEXT: s_lshr_b32 s49, s49, 16 2950; GCN-NOHSA-SI-NEXT: s_lshr_b32 s48, s48, 16 2951; GCN-NOHSA-SI-NEXT: s_lshr_b32 s51, s51, 16 2952; GCN-NOHSA-SI-NEXT: s_lshr_b32 s50, s50, 16 2953; GCN-NOHSA-SI-NEXT: s_lshr_b32 s43, s43, 16 2954; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 2955; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 2956; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2957; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2958; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s20 2959; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s50 2960; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s70 2961; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s51 2962; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s69 2963; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s48 2964; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s68 2965; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s49 2966; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s67 2967; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s46 2968; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s66 2969; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s47 2970; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s65 2971; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s44 2972; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s64 2973; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s45 2974; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s63 2975; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s42 2976; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s62 2977; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s61 2978; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s43 2979; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s40 2980; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s60 2981; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s41 2982; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 2983; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224 2984; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 2985; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192 2986; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 2987; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160 2988; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(5) 2989; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s59 2990; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s38 2991; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s58 2992; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s39 2993; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 2994; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 2995; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s57 2996; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s36 2997; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s56 2998; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s37 2999; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 3000; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3001; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 3002; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s35 3003; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 3004; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s34 3005; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 3006; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3007; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 3008; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s33 3009; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 3010; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s31 3011; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 3012; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3013; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 3014; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s30 3015; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 3016; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s29 3017; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 3018; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3019; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 3020; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s28 3021; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 3022; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s27 3023; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 3024; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3025; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 3026; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s26 3027; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 3028; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s25 3029; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3030; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3031; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 3032; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s24 3033; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 3034; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s23 3035; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 3036; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3037; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s55 3038; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s22 3039; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s54 3040; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s21 3041; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3042; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3043; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s53 3044; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 3045; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s52 3046; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s18 3047; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3048; GCN-NOHSA-SI-NEXT: s_endpgm 3049; 3050; GCN-HSA-LABEL: constant_zextload_v64i16_to_v64i32: 3051; GCN-HSA: ; %bb.0: 3052; GCN-HSA-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x0 3053; GCN-HSA-NEXT: s_mov_b32 s53, 0xffff 3054; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3055; GCN-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 3056; GCN-HSA-NEXT: s_load_dwordx16 s[36:51], s[18:19], 0x10 3057; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3058; GCN-HSA-NEXT: s_lshr_b32 s20, s1, 16 3059; GCN-HSA-NEXT: s_lshr_b32 s21, s0, 16 3060; GCN-HSA-NEXT: s_lshr_b32 s22, s3, 16 3061; GCN-HSA-NEXT: s_lshr_b32 s23, s2, 16 3062; GCN-HSA-NEXT: s_lshr_b32 s24, s5, 16 3063; GCN-HSA-NEXT: s_lshr_b32 s25, s4, 16 3064; GCN-HSA-NEXT: s_lshr_b32 s26, s7, 16 3065; GCN-HSA-NEXT: s_lshr_b32 s27, s6, 16 3066; GCN-HSA-NEXT: s_lshr_b32 s28, s9, 16 3067; GCN-HSA-NEXT: s_lshr_b32 s29, s8, 16 3068; GCN-HSA-NEXT: s_lshr_b32 s30, s11, 16 3069; GCN-HSA-NEXT: s_lshr_b32 s31, s10, 16 3070; GCN-HSA-NEXT: s_lshr_b32 s33, s13, 16 3071; GCN-HSA-NEXT: s_lshr_b32 s34, s12, 16 3072; GCN-HSA-NEXT: s_lshr_b32 s35, s15, 16 3073; GCN-HSA-NEXT: s_lshr_b32 s52, s14, 16 3074; GCN-HSA-NEXT: s_and_b32 s1, s1, s53 3075; GCN-HSA-NEXT: s_and_b32 s0, s0, s53 3076; GCN-HSA-NEXT: s_and_b32 s3, s3, s53 3077; GCN-HSA-NEXT: s_and_b32 s2, s2, s53 3078; GCN-HSA-NEXT: s_and_b32 s5, s5, s53 3079; GCN-HSA-NEXT: s_and_b32 s4, s4, s53 3080; GCN-HSA-NEXT: s_and_b32 s54, s7, s53 3081; GCN-HSA-NEXT: s_and_b32 s55, s6, s53 3082; GCN-HSA-NEXT: s_and_b32 s9, s9, s53 3083; GCN-HSA-NEXT: s_and_b32 s8, s8, s53 3084; GCN-HSA-NEXT: s_and_b32 s11, s11, s53 3085; GCN-HSA-NEXT: s_and_b32 s10, s10, s53 3086; GCN-HSA-NEXT: s_and_b32 s13, s13, s53 3087; GCN-HSA-NEXT: s_and_b32 s12, s12, s53 3088; GCN-HSA-NEXT: s_and_b32 s15, s15, s53 3089; GCN-HSA-NEXT: s_and_b32 s14, s14, s53 3090; GCN-HSA-NEXT: s_and_b32 s18, s37, s53 3091; GCN-HSA-NEXT: s_and_b32 s19, s36, s53 3092; GCN-HSA-NEXT: s_and_b32 s56, s39, s53 3093; GCN-HSA-NEXT: s_and_b32 s57, s38, s53 3094; GCN-HSA-NEXT: s_and_b32 s58, s41, s53 3095; GCN-HSA-NEXT: s_and_b32 s59, s40, s53 3096; GCN-HSA-NEXT: s_and_b32 s60, s43, s53 3097; GCN-HSA-NEXT: s_and_b32 s61, s42, s53 3098; GCN-HSA-NEXT: s_and_b32 s62, s45, s53 3099; GCN-HSA-NEXT: s_and_b32 s63, s44, s53 3100; GCN-HSA-NEXT: s_and_b32 s64, s47, s53 3101; GCN-HSA-NEXT: s_and_b32 s65, s46, s53 3102; GCN-HSA-NEXT: s_and_b32 s66, s49, s53 3103; GCN-HSA-NEXT: s_and_b32 s67, s48, s53 3104; GCN-HSA-NEXT: s_and_b32 s68, s51, s53 3105; GCN-HSA-NEXT: s_and_b32 s53, s50, s53 3106; GCN-HSA-NEXT: s_lshr_b32 s37, s37, 16 3107; GCN-HSA-NEXT: s_lshr_b32 s36, s36, 16 3108; GCN-HSA-NEXT: s_lshr_b32 s39, s39, 16 3109; GCN-HSA-NEXT: s_lshr_b32 s38, s38, 16 3110; GCN-HSA-NEXT: s_lshr_b32 s41, s41, 16 3111; GCN-HSA-NEXT: s_lshr_b32 s40, s40, 16 3112; GCN-HSA-NEXT: s_lshr_b32 s43, s43, 16 3113; GCN-HSA-NEXT: s_lshr_b32 s42, s42, 16 3114; GCN-HSA-NEXT: s_lshr_b32 s45, s45, 16 3115; GCN-HSA-NEXT: s_lshr_b32 s44, s44, 16 3116; GCN-HSA-NEXT: s_lshr_b32 s47, s47, 16 3117; GCN-HSA-NEXT: s_lshr_b32 s46, s46, 16 3118; GCN-HSA-NEXT: s_lshr_b32 s49, s49, 16 3119; GCN-HSA-NEXT: s_lshr_b32 s48, s48, 16 3120; GCN-HSA-NEXT: s_lshr_b32 s51, s51, 16 3121; GCN-HSA-NEXT: s_lshr_b32 s50, s50, 16 3122; GCN-HSA-NEXT: s_add_u32 s6, s16, 0xf0 3123; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3124; GCN-HSA-NEXT: v_mov_b32_e32 v22, s7 3125; GCN-HSA-NEXT: v_mov_b32_e32 v21, s6 3126; GCN-HSA-NEXT: s_add_u32 s6, s16, 0xe0 3127; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3128; GCN-HSA-NEXT: v_mov_b32_e32 v25, s7 3129; GCN-HSA-NEXT: v_mov_b32_e32 v24, s6 3130; GCN-HSA-NEXT: s_add_u32 s6, s16, 0xd0 3131; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3132; GCN-HSA-NEXT: v_mov_b32_e32 v27, s7 3133; GCN-HSA-NEXT: v_mov_b32_e32 v26, s6 3134; GCN-HSA-NEXT: s_add_u32 s6, s16, 0xc0 3135; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3136; GCN-HSA-NEXT: v_mov_b32_e32 v29, s7 3137; GCN-HSA-NEXT: v_mov_b32_e32 v28, s6 3138; GCN-HSA-NEXT: s_add_u32 s6, s16, 0xb0 3139; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3140; GCN-HSA-NEXT: v_mov_b32_e32 v31, s7 3141; GCN-HSA-NEXT: v_mov_b32_e32 v30, s6 3142; GCN-HSA-NEXT: s_add_u32 s6, s16, 0xa0 3143; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3144; GCN-HSA-NEXT: v_mov_b32_e32 v33, s7 3145; GCN-HSA-NEXT: v_mov_b32_e32 v32, s6 3146; GCN-HSA-NEXT: s_add_u32 s6, s16, 0x90 3147; GCN-HSA-NEXT: v_mov_b32_e32 v4, s67 3148; GCN-HSA-NEXT: v_mov_b32_e32 v5, s48 3149; GCN-HSA-NEXT: v_mov_b32_e32 v6, s66 3150; GCN-HSA-NEXT: v_mov_b32_e32 v7, s49 3151; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3152; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7] 3153; GCN-HSA-NEXT: v_mov_b32_e32 v25, s7 3154; GCN-HSA-NEXT: v_mov_b32_e32 v24, s6 3155; GCN-HSA-NEXT: s_add_u32 s6, s16, 0x80 3156; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3157; GCN-HSA-NEXT: v_mov_b32_e32 v35, s7 3158; GCN-HSA-NEXT: v_mov_b32_e32 v34, s6 3159; GCN-HSA-NEXT: s_add_u32 s6, s16, 0x70 3160; GCN-HSA-NEXT: v_mov_b32_e32 v16, s61 3161; GCN-HSA-NEXT: v_mov_b32_e32 v17, s42 3162; GCN-HSA-NEXT: v_mov_b32_e32 v18, s60 3163; GCN-HSA-NEXT: v_mov_b32_e32 v19, s43 3164; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3165; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19] 3166; GCN-HSA-NEXT: v_mov_b32_e32 v0, s53 3167; GCN-HSA-NEXT: v_mov_b32_e32 v17, s7 3168; GCN-HSA-NEXT: v_mov_b32_e32 v16, s6 3169; GCN-HSA-NEXT: s_add_u32 s6, s16, 0x60 3170; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3171; GCN-HSA-NEXT: v_mov_b32_e32 v1, s50 3172; GCN-HSA-NEXT: v_mov_b32_e32 v2, s68 3173; GCN-HSA-NEXT: v_mov_b32_e32 v3, s51 3174; GCN-HSA-NEXT: v_mov_b32_e32 v19, s7 3175; GCN-HSA-NEXT: v_mov_b32_e32 v8, s65 3176; GCN-HSA-NEXT: v_mov_b32_e32 v9, s46 3177; GCN-HSA-NEXT: v_mov_b32_e32 v10, s64 3178; GCN-HSA-NEXT: v_mov_b32_e32 v11, s47 3179; GCN-HSA-NEXT: v_mov_b32_e32 v12, s63 3180; GCN-HSA-NEXT: v_mov_b32_e32 v13, s44 3181; GCN-HSA-NEXT: v_mov_b32_e32 v14, s62 3182; GCN-HSA-NEXT: v_mov_b32_e32 v15, s45 3183; GCN-HSA-NEXT: v_mov_b32_e32 v20, s59 3184; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[0:3] 3185; GCN-HSA-NEXT: v_mov_b32_e32 v21, s40 3186; GCN-HSA-NEXT: v_mov_b32_e32 v0, s57 3187; GCN-HSA-NEXT: v_mov_b32_e32 v22, s58 3188; GCN-HSA-NEXT: v_mov_b32_e32 v23, s41 3189; GCN-HSA-NEXT: v_mov_b32_e32 v1, s38 3190; GCN-HSA-NEXT: v_mov_b32_e32 v2, s56 3191; GCN-HSA-NEXT: v_mov_b32_e32 v4, s19 3192; GCN-HSA-NEXT: v_mov_b32_e32 v3, s39 3193; GCN-HSA-NEXT: v_mov_b32_e32 v5, s36 3194; GCN-HSA-NEXT: v_mov_b32_e32 v18, s6 3195; GCN-HSA-NEXT: s_add_u32 s6, s16, 0x50 3196; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11] 3197; GCN-HSA-NEXT: v_mov_b32_e32 v6, s18 3198; GCN-HSA-NEXT: v_mov_b32_e32 v8, s14 3199; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15] 3200; GCN-HSA-NEXT: v_mov_b32_e32 v7, s37 3201; GCN-HSA-NEXT: v_mov_b32_e32 v12, s12 3202; GCN-HSA-NEXT: v_mov_b32_e32 v9, s52 3203; GCN-HSA-NEXT: v_mov_b32_e32 v10, s15 3204; GCN-HSA-NEXT: v_mov_b32_e32 v11, s35 3205; GCN-HSA-NEXT: v_mov_b32_e32 v13, s34 3206; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[20:23] 3207; GCN-HSA-NEXT: v_mov_b32_e32 v14, s13 3208; GCN-HSA-NEXT: v_mov_b32_e32 v15, s33 3209; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3] 3210; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[4:7] 3211; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 3212; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 3213; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3214; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6 3215; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 3216; GCN-HSA-NEXT: v_mov_b32_e32 v1, s31 3217; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 3218; GCN-HSA-NEXT: v_mov_b32_e32 v3, s30 3219; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7 3220; GCN-HSA-NEXT: s_add_u32 s6, s16, 64 3221; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3222; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3223; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6 3224; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 3225; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29 3226; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 3227; GCN-HSA-NEXT: v_mov_b32_e32 v3, s28 3228; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7 3229; GCN-HSA-NEXT: s_add_u32 s6, s16, 48 3230; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3231; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 3232; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6 3233; GCN-HSA-NEXT: v_mov_b32_e32 v0, s55 3234; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27 3235; GCN-HSA-NEXT: v_mov_b32_e32 v2, s54 3236; GCN-HSA-NEXT: v_mov_b32_e32 v3, s26 3237; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7 3238; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3239; GCN-HSA-NEXT: s_nop 0 3240; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 3241; GCN-HSA-NEXT: s_add_u32 s4, s16, 32 3242; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 3243; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 3244; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 3245; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 3246; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24 3247; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 3248; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3249; GCN-HSA-NEXT: s_nop 0 3250; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 3251; GCN-HSA-NEXT: s_add_u32 s2, s16, 16 3252; GCN-HSA-NEXT: v_mov_b32_e32 v2, s3 3253; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 3254; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3255; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23 3256; GCN-HSA-NEXT: v_mov_b32_e32 v3, s22 3257; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3258; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3259; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 3260; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 3261; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 3262; GCN-HSA-NEXT: v_mov_b32_e32 v2, s1 3263; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20 3264; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17 3265; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3266; GCN-HSA-NEXT: s_endpgm 3267; 3268; GCN-NOHSA-VI-LABEL: constant_zextload_v64i16_to_v64i32: 3269; GCN-NOHSA-VI: ; %bb.0: 3270; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[36:39], s[0:1], 0x24 3271; GCN-NOHSA-VI-NEXT: s_mov_b32 s40, 0xffff 3272; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3273; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[16:31], s[38:39], 0x0 3274; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[38:39], 0x40 3275; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3276; GCN-NOHSA-VI-NEXT: s_lshr_b32 s33, s17, 16 3277; GCN-NOHSA-VI-NEXT: s_and_b32 s17, s17, s40 3278; GCN-NOHSA-VI-NEXT: s_lshr_b32 s34, s16, 16 3279; GCN-NOHSA-VI-NEXT: s_and_b32 s16, s16, s40 3280; GCN-NOHSA-VI-NEXT: s_lshr_b32 s35, s19, 16 3281; GCN-NOHSA-VI-NEXT: s_and_b32 s19, s19, s40 3282; GCN-NOHSA-VI-NEXT: s_lshr_b32 s38, s18, 16 3283; GCN-NOHSA-VI-NEXT: s_and_b32 s18, s18, s40 3284; GCN-NOHSA-VI-NEXT: s_lshr_b32 s39, s21, 16 3285; GCN-NOHSA-VI-NEXT: s_and_b32 s21, s21, s40 3286; GCN-NOHSA-VI-NEXT: s_lshr_b32 s41, s20, 16 3287; GCN-NOHSA-VI-NEXT: s_and_b32 s20, s20, s40 3288; GCN-NOHSA-VI-NEXT: s_lshr_b32 s42, s23, 16 3289; GCN-NOHSA-VI-NEXT: s_and_b32 s23, s23, s40 3290; GCN-NOHSA-VI-NEXT: s_lshr_b32 s43, s22, 16 3291; GCN-NOHSA-VI-NEXT: s_and_b32 s22, s22, s40 3292; GCN-NOHSA-VI-NEXT: s_lshr_b32 s44, s25, 16 3293; GCN-NOHSA-VI-NEXT: s_and_b32 s25, s25, s40 3294; GCN-NOHSA-VI-NEXT: s_lshr_b32 s45, s24, 16 3295; GCN-NOHSA-VI-NEXT: s_and_b32 s24, s24, s40 3296; GCN-NOHSA-VI-NEXT: s_lshr_b32 s46, s27, 16 3297; GCN-NOHSA-VI-NEXT: s_and_b32 s27, s27, s40 3298; GCN-NOHSA-VI-NEXT: s_lshr_b32 s47, s26, 16 3299; GCN-NOHSA-VI-NEXT: s_and_b32 s26, s26, s40 3300; GCN-NOHSA-VI-NEXT: s_lshr_b32 s48, s29, 16 3301; GCN-NOHSA-VI-NEXT: s_and_b32 s29, s29, s40 3302; GCN-NOHSA-VI-NEXT: s_lshr_b32 s49, s28, 16 3303; GCN-NOHSA-VI-NEXT: s_and_b32 s28, s28, s40 3304; GCN-NOHSA-VI-NEXT: s_lshr_b32 s50, s31, 16 3305; GCN-NOHSA-VI-NEXT: s_and_b32 s31, s31, s40 3306; GCN-NOHSA-VI-NEXT: s_lshr_b32 s51, s30, 16 3307; GCN-NOHSA-VI-NEXT: s_and_b32 s30, s30, s40 3308; GCN-NOHSA-VI-NEXT: s_and_b32 s53, s1, s40 3309; GCN-NOHSA-VI-NEXT: s_and_b32 s55, s0, s40 3310; GCN-NOHSA-VI-NEXT: s_and_b32 s57, s3, s40 3311; GCN-NOHSA-VI-NEXT: s_and_b32 s59, s2, s40 3312; GCN-NOHSA-VI-NEXT: s_lshr_b32 s60, s5, 16 3313; GCN-NOHSA-VI-NEXT: s_and_b32 s5, s5, s40 3314; GCN-NOHSA-VI-NEXT: s_lshr_b32 s61, s4, 16 3315; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s4, s40 3316; GCN-NOHSA-VI-NEXT: s_lshr_b32 s62, s7, 16 3317; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s7, s40 3318; GCN-NOHSA-VI-NEXT: s_lshr_b32 s63, s6, 16 3319; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s6, s40 3320; GCN-NOHSA-VI-NEXT: s_and_b32 s64, s9, s40 3321; GCN-NOHSA-VI-NEXT: s_and_b32 s65, s8, s40 3322; GCN-NOHSA-VI-NEXT: s_and_b32 s66, s11, s40 3323; GCN-NOHSA-VI-NEXT: s_and_b32 s67, s10, s40 3324; GCN-NOHSA-VI-NEXT: s_and_b32 s68, s13, s40 3325; GCN-NOHSA-VI-NEXT: s_and_b32 s69, s12, s40 3326; GCN-NOHSA-VI-NEXT: s_and_b32 s70, s15, s40 3327; GCN-NOHSA-VI-NEXT: s_and_b32 s40, s14, s40 3328; GCN-NOHSA-VI-NEXT: s_lshr_b32 s15, s15, 16 3329; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s14, 16 3330; GCN-NOHSA-VI-NEXT: s_lshr_b32 s52, s1, 16 3331; GCN-NOHSA-VI-NEXT: s_lshr_b32 s54, s0, 16 3332; GCN-NOHSA-VI-NEXT: s_lshr_b32 s56, s3, 16 3333; GCN-NOHSA-VI-NEXT: s_lshr_b32 s58, s2, 16 3334; GCN-NOHSA-VI-NEXT: s_lshr_b32 s13, s13, 16 3335; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s12, 16 3336; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 3337; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 3338; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s36 3339; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s37 3340; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s40 3341; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s14 3342; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s70 3343; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s15 3344; GCN-NOHSA-VI-NEXT: s_lshr_b32 s11, s11, 16 3345; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s10, 16 3346; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 3347; GCN-NOHSA-VI-NEXT: s_lshr_b32 s9, s9, 16 3348; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s69 3349; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s12 3350; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s68 3351; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s13 3352; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s8, 16 3353; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 3354; GCN-NOHSA-VI-NEXT: s_nop 0 3355; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s67 3356; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s10 3357; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s66 3358; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11 3359; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 3360; GCN-NOHSA-VI-NEXT: s_nop 0 3361; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s65 3362; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s8 3363; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s64 3364; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s9 3365; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 3366; GCN-NOHSA-VI-NEXT: s_nop 0 3367; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 3368; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s63 3369; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 3370; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s62 3371; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 3372; GCN-NOHSA-VI-NEXT: s_nop 0 3373; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 3374; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s61 3375; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 3376; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s60 3377; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 3378; GCN-NOHSA-VI-NEXT: s_nop 0 3379; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s59 3380; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s58 3381; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s57 3382; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s56 3383; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 3384; GCN-NOHSA-VI-NEXT: s_nop 0 3385; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s55 3386; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s54 3387; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s53 3388; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s52 3389; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 3390; GCN-NOHSA-VI-NEXT: s_nop 0 3391; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30 3392; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s51 3393; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s31 3394; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s50 3395; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 3396; GCN-NOHSA-VI-NEXT: s_nop 0 3397; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 3398; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s49 3399; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s29 3400; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s48 3401; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 3402; GCN-NOHSA-VI-NEXT: s_nop 0 3403; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s26 3404; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s47 3405; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s27 3406; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s46 3407; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 3408; GCN-NOHSA-VI-NEXT: s_nop 0 3409; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 3410; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s45 3411; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s25 3412; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s44 3413; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 3414; GCN-NOHSA-VI-NEXT: s_nop 0 3415; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22 3416; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s43 3417; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s23 3418; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s42 3419; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3420; GCN-NOHSA-VI-NEXT: s_nop 0 3421; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20 3422; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s41 3423; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s21 3424; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s39 3425; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 3426; GCN-NOHSA-VI-NEXT: s_nop 0 3427; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 3428; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s38 3429; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19 3430; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 3431; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3432; GCN-NOHSA-VI-NEXT: s_nop 0 3433; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16 3434; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s34 3435; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s17 3436; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33 3437; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3438; GCN-NOHSA-VI-NEXT: s_endpgm 3439; 3440; EG-LABEL: constant_zextload_v64i16_to_v64i32: 3441; EG: ; %bb.0: 3442; EG-NEXT: ALU 0, @38, KC0[CB0:0-32], KC1[] 3443; EG-NEXT: TEX 3 @22 3444; EG-NEXT: ALU 55, @39, KC0[CB0:0-32], KC1[] 3445; EG-NEXT: TEX 3 @30 3446; EG-NEXT: ALU 87, @95, KC0[CB0:0-32], KC1[] 3447; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0 3448; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T49.X, 0 3449; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T64.X, 0 3450; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T50.X, 0 3451; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T61.X, 0 3452; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T51.X, 0 3453; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T56.XYZW, T58.X, 0 3454; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T52.X, 0 3455; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T53.XYZW, T55.X, 0 3456; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T39.X, 0 3457; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T48.X, 0 3458; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T40.X, 0 3459; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T46.X, 0 3460; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T41.X, 0 3461; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T43.X, 0 3462; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T36.X, 1 3463; EG-NEXT: CF_END 3464; EG-NEXT: Fetch clause starting at 22: 3465; EG-NEXT: VTX_READ_128 T36.XYZW, T35.X, 0, #1 3466; EG-NEXT: VTX_READ_128 T39.XYZW, T35.X, 48, #1 3467; EG-NEXT: VTX_READ_128 T40.XYZW, T35.X, 32, #1 3468; EG-NEXT: VTX_READ_128 T41.XYZW, T35.X, 16, #1 3469; EG-NEXT: Fetch clause starting at 30: 3470; EG-NEXT: VTX_READ_128 T49.XYZW, T35.X, 112, #1 3471; EG-NEXT: VTX_READ_128 T50.XYZW, T35.X, 96, #1 3472; EG-NEXT: VTX_READ_128 T51.XYZW, T35.X, 80, #1 3473; EG-NEXT: VTX_READ_128 T52.XYZW, T35.X, 64, #1 3474; EG-NEXT: ALU clause starting at 38: 3475; EG-NEXT: MOV * T35.X, KC0[2].Z, 3476; EG-NEXT: ALU clause starting at 39: 3477; EG-NEXT: LSHR * T37.W, T36.Y, literal.x, 3478; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3479; EG-NEXT: AND_INT * T37.Z, T36.Y, literal.x, 3480; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3481; EG-NEXT: LSHR T37.Y, T36.X, literal.x, 3482; EG-NEXT: LSHR * T38.W, T36.W, literal.x, 3483; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3484; EG-NEXT: AND_INT T37.X, T36.X, literal.x, 3485; EG-NEXT: AND_INT T38.Z, T36.W, literal.x, 3486; EG-NEXT: LSHR * T36.X, KC0[2].Y, literal.y, 3487; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 3488; EG-NEXT: LSHR T38.Y, T36.Z, literal.x, 3489; EG-NEXT: LSHR * T42.W, T41.Y, literal.x, 3490; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3491; EG-NEXT: AND_INT T38.X, T36.Z, literal.x, 3492; EG-NEXT: AND_INT T42.Z, T41.Y, literal.x, 3493; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3494; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 3495; EG-NEXT: LSHR T43.X, PV.W, literal.x, 3496; EG-NEXT: LSHR T42.Y, T41.X, literal.y, 3497; EG-NEXT: LSHR T44.W, T41.W, literal.y, 3498; EG-NEXT: AND_INT * T42.X, T41.X, literal.z, 3499; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3500; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3501; EG-NEXT: AND_INT T44.Z, T41.W, literal.x, 3502; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3503; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 3504; EG-NEXT: LSHR T41.X, PV.W, literal.x, 3505; EG-NEXT: LSHR T44.Y, T41.Z, literal.y, 3506; EG-NEXT: LSHR T45.W, T40.Y, literal.y, 3507; EG-NEXT: AND_INT * T44.X, T41.Z, literal.z, 3508; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3509; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3510; EG-NEXT: AND_INT T45.Z, T40.Y, literal.x, 3511; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3512; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 3513; EG-NEXT: LSHR T46.X, PV.W, literal.x, 3514; EG-NEXT: LSHR T45.Y, T40.X, literal.y, 3515; EG-NEXT: LSHR T47.W, T40.W, literal.y, 3516; EG-NEXT: AND_INT * T45.X, T40.X, literal.z, 3517; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3518; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3519; EG-NEXT: AND_INT T47.Z, T40.W, literal.x, 3520; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3521; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 3522; EG-NEXT: LSHR T40.X, PV.W, literal.x, 3523; EG-NEXT: LSHR T47.Y, T40.Z, literal.y, 3524; EG-NEXT: AND_INT * T47.X, T40.Z, literal.z, 3525; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3526; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3527; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.x, 3528; EG-NEXT: LSHR * T35.W, T39.Y, literal.y, 3529; EG-NEXT: 80(1.121039e-43), 16(2.242078e-44) 3530; EG-NEXT: LSHR T48.X, PV.W, literal.x, 3531; EG-NEXT: AND_INT * T35.Z, T39.Y, literal.y, 3532; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41) 3533; EG-NEXT: ALU clause starting at 95: 3534; EG-NEXT: LSHR T35.Y, T39.X, literal.x, 3535; EG-NEXT: LSHR * T53.W, T39.W, literal.x, 3536; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3537; EG-NEXT: AND_INT T35.X, T39.X, literal.x, 3538; EG-NEXT: AND_INT T53.Z, T39.W, literal.x, 3539; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3540; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 3541; EG-NEXT: LSHR T39.X, PV.W, literal.x, 3542; EG-NEXT: LSHR T53.Y, T39.Z, literal.y, 3543; EG-NEXT: LSHR T54.W, T52.Y, literal.y, 3544; EG-NEXT: AND_INT * T53.X, T39.Z, literal.z, 3545; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3546; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3547; EG-NEXT: AND_INT T54.Z, T52.Y, literal.x, 3548; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3549; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 3550; EG-NEXT: LSHR T55.X, PV.W, literal.x, 3551; EG-NEXT: LSHR T54.Y, T52.X, literal.y, 3552; EG-NEXT: LSHR T56.W, T52.W, literal.y, 3553; EG-NEXT: AND_INT * T54.X, T52.X, literal.z, 3554; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3555; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3556; EG-NEXT: AND_INT T56.Z, T52.W, literal.x, 3557; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3558; EG-NEXT: 65535(9.183409e-41), 128(1.793662e-43) 3559; EG-NEXT: LSHR T52.X, PV.W, literal.x, 3560; EG-NEXT: LSHR T56.Y, T52.Z, literal.y, 3561; EG-NEXT: LSHR T57.W, T51.Y, literal.y, 3562; EG-NEXT: AND_INT * T56.X, T52.Z, literal.z, 3563; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3564; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3565; EG-NEXT: AND_INT T57.Z, T51.Y, literal.x, 3566; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3567; EG-NEXT: 65535(9.183409e-41), 144(2.017870e-43) 3568; EG-NEXT: LSHR T58.X, PV.W, literal.x, 3569; EG-NEXT: LSHR T57.Y, T51.X, literal.y, 3570; EG-NEXT: LSHR T59.W, T51.W, literal.y, 3571; EG-NEXT: AND_INT * T57.X, T51.X, literal.z, 3572; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3573; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3574; EG-NEXT: AND_INT T59.Z, T51.W, literal.x, 3575; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3576; EG-NEXT: 65535(9.183409e-41), 160(2.242078e-43) 3577; EG-NEXT: LSHR T51.X, PV.W, literal.x, 3578; EG-NEXT: LSHR T59.Y, T51.Z, literal.y, 3579; EG-NEXT: LSHR T60.W, T50.Y, literal.y, 3580; EG-NEXT: AND_INT * T59.X, T51.Z, literal.z, 3581; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3582; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3583; EG-NEXT: AND_INT T60.Z, T50.Y, literal.x, 3584; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3585; EG-NEXT: 65535(9.183409e-41), 176(2.466285e-43) 3586; EG-NEXT: LSHR T61.X, PV.W, literal.x, 3587; EG-NEXT: LSHR T60.Y, T50.X, literal.y, 3588; EG-NEXT: LSHR T62.W, T50.W, literal.y, 3589; EG-NEXT: AND_INT * T60.X, T50.X, literal.z, 3590; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3591; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3592; EG-NEXT: AND_INT T62.Z, T50.W, literal.x, 3593; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3594; EG-NEXT: 65535(9.183409e-41), 192(2.690493e-43) 3595; EG-NEXT: LSHR T50.X, PV.W, literal.x, 3596; EG-NEXT: LSHR T62.Y, T50.Z, literal.y, 3597; EG-NEXT: LSHR T63.W, T49.Y, literal.y, 3598; EG-NEXT: AND_INT * T62.X, T50.Z, literal.z, 3599; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3600; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3601; EG-NEXT: AND_INT T63.Z, T49.Y, literal.x, 3602; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3603; EG-NEXT: 65535(9.183409e-41), 208(2.914701e-43) 3604; EG-NEXT: LSHR T64.X, PV.W, literal.x, 3605; EG-NEXT: LSHR T63.Y, T49.X, literal.y, 3606; EG-NEXT: LSHR T65.W, T49.W, literal.y, 3607; EG-NEXT: AND_INT * T63.X, T49.X, literal.z, 3608; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3609; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3610; EG-NEXT: AND_INT T65.Z, T49.W, literal.x, 3611; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3612; EG-NEXT: 65535(9.183409e-41), 224(3.138909e-43) 3613; EG-NEXT: LSHR T49.X, PV.W, literal.x, 3614; EG-NEXT: LSHR T65.Y, T49.Z, literal.y, 3615; EG-NEXT: AND_INT * T65.X, T49.Z, literal.z, 3616; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3617; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3618; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3619; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) 3620; EG-NEXT: LSHR * T66.X, PV.W, literal.x, 3621; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 3622 %load = load <64 x i16>, <64 x i16> addrspace(4)* %in 3623 %ext = zext <64 x i16> %load to <64 x i32> 3624 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 3625 ret void 3626} 3627 3628define amdgpu_kernel void @constant_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 { 3629; GCN-NOHSA-SI-LABEL: constant_sextload_v64i16_to_v64i32: 3630; GCN-NOHSA-SI: ; %bb.0: 3631; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[36:39], s[0:1], 0x9 3632; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 3633; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[38:39], 0x0 3634; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[16:31], s[38:39], 0x10 3635; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 3636; GCN-NOHSA-SI-NEXT: s_ashr_i32 s33, s1, 16 3637; GCN-NOHSA-SI-NEXT: s_ashr_i32 s34, s0, 16 3638; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s35, s1 3639; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s38, s0 3640; GCN-NOHSA-SI-NEXT: s_ashr_i32 s39, s3, 16 3641; GCN-NOHSA-SI-NEXT: s_ashr_i32 s40, s2, 16 3642; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s41, s3 3643; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s42, s2 3644; GCN-NOHSA-SI-NEXT: s_ashr_i32 s43, s5, 16 3645; GCN-NOHSA-SI-NEXT: s_ashr_i32 s44, s4, 16 3646; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s5, s5 3647; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s4, s4 3648; GCN-NOHSA-SI-NEXT: s_ashr_i32 s45, s7, 16 3649; GCN-NOHSA-SI-NEXT: s_ashr_i32 s46, s6, 16 3650; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s7, s7 3651; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s6, s6 3652; GCN-NOHSA-SI-NEXT: s_ashr_i32 s47, s9, 16 3653; GCN-NOHSA-SI-NEXT: s_ashr_i32 s48, s8, 16 3654; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s9, s9 3655; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s8, s8 3656; GCN-NOHSA-SI-NEXT: s_ashr_i32 s49, s11, 16 3657; GCN-NOHSA-SI-NEXT: s_ashr_i32 s50, s10, 16 3658; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s11, s11 3659; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s10, s10 3660; GCN-NOHSA-SI-NEXT: s_ashr_i32 s51, s13, 16 3661; GCN-NOHSA-SI-NEXT: s_ashr_i32 s52, s12, 16 3662; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s13, s13 3663; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s12, s12 3664; GCN-NOHSA-SI-NEXT: s_ashr_i32 s53, s15, 16 3665; GCN-NOHSA-SI-NEXT: s_ashr_i32 s54, s14, 16 3666; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s15, s15 3667; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s14, s14 3668; GCN-NOHSA-SI-NEXT: s_ashr_i32 s55, s17, 16 3669; GCN-NOHSA-SI-NEXT: s_ashr_i32 s56, s16, 16 3670; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s17, s17 3671; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s16, s16 3672; GCN-NOHSA-SI-NEXT: s_ashr_i32 s57, s19, 16 3673; GCN-NOHSA-SI-NEXT: s_ashr_i32 s58, s18, 16 3674; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s19, s19 3675; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s18, s18 3676; GCN-NOHSA-SI-NEXT: s_ashr_i32 s59, s21, 16 3677; GCN-NOHSA-SI-NEXT: s_ashr_i32 s60, s20, 16 3678; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s21, s21 3679; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s20, s20 3680; GCN-NOHSA-SI-NEXT: s_ashr_i32 s61, s22, 16 3681; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s62, s23 3682; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s22, s22 3683; GCN-NOHSA-SI-NEXT: s_ashr_i32 s63, s25, 16 3684; GCN-NOHSA-SI-NEXT: s_ashr_i32 s64, s24, 16 3685; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s25, s25 3686; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s24, s24 3687; GCN-NOHSA-SI-NEXT: s_ashr_i32 s65, s27, 16 3688; GCN-NOHSA-SI-NEXT: s_ashr_i32 s66, s26, 16 3689; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s27, s27 3690; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s26, s26 3691; GCN-NOHSA-SI-NEXT: s_ashr_i32 s67, s29, 16 3692; GCN-NOHSA-SI-NEXT: s_ashr_i32 s68, s28, 16 3693; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s29, s29 3694; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s28, s28 3695; GCN-NOHSA-SI-NEXT: s_ashr_i32 s69, s31, 16 3696; GCN-NOHSA-SI-NEXT: s_ashr_i32 s70, s30, 16 3697; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s31, s31 3698; GCN-NOHSA-SI-NEXT: s_sext_i32_i16 s30, s30 3699; GCN-NOHSA-SI-NEXT: s_ashr_i32 s23, s23, 16 3700; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s36 3701; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s37 3702; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 3703; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 3704; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s30 3705; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s70 3706; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s31 3707; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s69 3708; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s28 3709; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s68 3710; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s29 3711; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s67 3712; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s26 3713; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s66 3714; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s27 3715; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s65 3716; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s24 3717; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s64 3718; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s25 3719; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s63 3720; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s22 3721; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s61 3722; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s62 3723; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s20 3724; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s23 3725; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s60 3726; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s21 3727; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s59 3728; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 3729; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:224 3730; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 3731; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:192 3732; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:176 3733; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:160 3734; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(5) 3735; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 3736; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s58 3737; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s19 3738; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s57 3739; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 3740; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3741; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s16 3742; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s56 3743; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s17 3744; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s55 3745; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 3746; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3747; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 3748; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s54 3749; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 3750; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s53 3751; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 3752; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3753; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 3754; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s52 3755; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 3756; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s51 3757; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 3758; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3759; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 3760; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s50 3761; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s11 3762; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s49 3763; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 3764; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3765; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 3766; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s48 3767; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s9 3768; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s47 3769; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 3770; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3771; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 3772; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s46 3773; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 3774; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s45 3775; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3776; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3777; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 3778; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s44 3779; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 3780; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s43 3781; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 3782; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3783; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s42 3784; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s40 3785; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s41 3786; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s39 3787; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3788; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3789; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s38 3790; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s34 3791; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s35 3792; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s33 3793; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3794; GCN-NOHSA-SI-NEXT: s_endpgm 3795; 3796; GCN-HSA-LABEL: constant_sextload_v64i16_to_v64i32: 3797; GCN-HSA: ; %bb.0: 3798; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3799; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3800; GCN-HSA-NEXT: s_load_dwordx16 s[16:31], s[2:3], 0x0 3801; GCN-HSA-NEXT: s_load_dwordx16 s[36:51], s[2:3], 0x10 3802; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3803; GCN-HSA-NEXT: s_ashr_i32 s4, s17, 16 3804; GCN-HSA-NEXT: s_ashr_i32 s5, s16, 16 3805; GCN-HSA-NEXT: s_sext_i32_i16 s6, s17 3806; GCN-HSA-NEXT: s_sext_i32_i16 s7, s16 3807; GCN-HSA-NEXT: s_ashr_i32 s8, s19, 16 3808; GCN-HSA-NEXT: s_ashr_i32 s9, s18, 16 3809; GCN-HSA-NEXT: s_sext_i32_i16 s10, s19 3810; GCN-HSA-NEXT: s_sext_i32_i16 s11, s18 3811; GCN-HSA-NEXT: s_ashr_i32 s12, s21, 16 3812; GCN-HSA-NEXT: s_ashr_i32 s13, s20, 16 3813; GCN-HSA-NEXT: s_sext_i32_i16 s14, s21 3814; GCN-HSA-NEXT: s_sext_i32_i16 s15, s20 3815; GCN-HSA-NEXT: s_ashr_i32 s16, s23, 16 3816; GCN-HSA-NEXT: s_ashr_i32 s17, s22, 16 3817; GCN-HSA-NEXT: s_sext_i32_i16 s18, s23 3818; GCN-HSA-NEXT: s_sext_i32_i16 s19, s22 3819; GCN-HSA-NEXT: s_ashr_i32 s20, s25, 16 3820; GCN-HSA-NEXT: s_ashr_i32 s21, s24, 16 3821; GCN-HSA-NEXT: s_sext_i32_i16 s22, s25 3822; GCN-HSA-NEXT: s_sext_i32_i16 s23, s24 3823; GCN-HSA-NEXT: s_ashr_i32 s24, s27, 16 3824; GCN-HSA-NEXT: s_ashr_i32 s25, s26, 16 3825; GCN-HSA-NEXT: s_ashr_i32 s33, s29, 16 3826; GCN-HSA-NEXT: s_ashr_i32 s34, s28, 16 3827; GCN-HSA-NEXT: s_ashr_i32 s35, s31, 16 3828; GCN-HSA-NEXT: s_ashr_i32 s52, s30, 16 3829; GCN-HSA-NEXT: s_ashr_i32 s53, s37, 16 3830; GCN-HSA-NEXT: s_ashr_i32 s54, s36, 16 3831; GCN-HSA-NEXT: s_ashr_i32 s55, s39, 16 3832; GCN-HSA-NEXT: s_ashr_i32 s56, s38, 16 3833; GCN-HSA-NEXT: s_ashr_i32 s57, s41, 16 3834; GCN-HSA-NEXT: s_ashr_i32 s58, s40, 16 3835; GCN-HSA-NEXT: s_ashr_i32 s59, s43, 16 3836; GCN-HSA-NEXT: s_ashr_i32 s60, s42, 16 3837; GCN-HSA-NEXT: s_ashr_i32 s61, s45, 16 3838; GCN-HSA-NEXT: s_ashr_i32 s62, s44, 16 3839; GCN-HSA-NEXT: s_ashr_i32 s63, s47, 16 3840; GCN-HSA-NEXT: s_ashr_i32 s64, s46, 16 3841; GCN-HSA-NEXT: s_ashr_i32 s65, s49, 16 3842; GCN-HSA-NEXT: s_ashr_i32 s66, s48, 16 3843; GCN-HSA-NEXT: s_ashr_i32 s67, s51, 16 3844; GCN-HSA-NEXT: s_ashr_i32 s68, s50, 16 3845; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0 3846; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3847; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3 3848; GCN-HSA-NEXT: v_mov_b32_e32 v21, s2 3849; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 3850; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3851; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 3852; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 3853; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0 3854; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3855; GCN-HSA-NEXT: v_mov_b32_e32 v27, s3 3856; GCN-HSA-NEXT: v_mov_b32_e32 v26, s2 3857; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 3858; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3859; GCN-HSA-NEXT: v_mov_b32_e32 v29, s3 3860; GCN-HSA-NEXT: v_mov_b32_e32 v28, s2 3861; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0 3862; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3863; GCN-HSA-NEXT: v_mov_b32_e32 v31, s3 3864; GCN-HSA-NEXT: v_mov_b32_e32 v30, s2 3865; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0 3866; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3867; GCN-HSA-NEXT: v_mov_b32_e32 v33, s3 3868; GCN-HSA-NEXT: s_sext_i32_i16 s49, s49 3869; GCN-HSA-NEXT: s_sext_i32_i16 s48, s48 3870; GCN-HSA-NEXT: v_mov_b32_e32 v32, s2 3871; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90 3872; GCN-HSA-NEXT: v_mov_b32_e32 v4, s48 3873; GCN-HSA-NEXT: v_mov_b32_e32 v5, s66 3874; GCN-HSA-NEXT: v_mov_b32_e32 v6, s49 3875; GCN-HSA-NEXT: v_mov_b32_e32 v7, s65 3876; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3877; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7] 3878; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 3879; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 3880; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 3881; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3882; GCN-HSA-NEXT: v_mov_b32_e32 v35, s3 3883; GCN-HSA-NEXT: s_sext_i32_i16 s43, s43 3884; GCN-HSA-NEXT: s_sext_i32_i16 s42, s42 3885; GCN-HSA-NEXT: v_mov_b32_e32 v34, s2 3886; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 3887; GCN-HSA-NEXT: v_mov_b32_e32 v16, s42 3888; GCN-HSA-NEXT: v_mov_b32_e32 v17, s60 3889; GCN-HSA-NEXT: v_mov_b32_e32 v18, s43 3890; GCN-HSA-NEXT: v_mov_b32_e32 v19, s59 3891; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3892; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19] 3893; GCN-HSA-NEXT: s_sext_i32_i16 s51, s51 3894; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 3895; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 3896; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 3897; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3898; GCN-HSA-NEXT: s_sext_i32_i16 s50, s50 3899; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 3900; GCN-HSA-NEXT: s_sext_i32_i16 s36, s36 3901; GCN-HSA-NEXT: s_sext_i32_i16 s39, s39 3902; GCN-HSA-NEXT: s_sext_i32_i16 s38, s38 3903; GCN-HSA-NEXT: s_sext_i32_i16 s41, s41 3904; GCN-HSA-NEXT: s_sext_i32_i16 s40, s40 3905; GCN-HSA-NEXT: s_sext_i32_i16 s45, s45 3906; GCN-HSA-NEXT: s_sext_i32_i16 s44, s44 3907; GCN-HSA-NEXT: s_sext_i32_i16 s47, s47 3908; GCN-HSA-NEXT: s_sext_i32_i16 s46, s46 3909; GCN-HSA-NEXT: v_mov_b32_e32 v0, s50 3910; GCN-HSA-NEXT: v_mov_b32_e32 v1, s68 3911; GCN-HSA-NEXT: v_mov_b32_e32 v2, s51 3912; GCN-HSA-NEXT: v_mov_b32_e32 v3, s67 3913; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 3914; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 3915; GCN-HSA-NEXT: s_sext_i32_i16 s29, s29 3916; GCN-HSA-NEXT: s_sext_i32_i16 s28, s28 3917; GCN-HSA-NEXT: s_sext_i32_i16 s31, s31 3918; GCN-HSA-NEXT: s_sext_i32_i16 s30, s30 3919; GCN-HSA-NEXT: s_sext_i32_i16 s37, s37 3920; GCN-HSA-NEXT: v_mov_b32_e32 v8, s46 3921; GCN-HSA-NEXT: v_mov_b32_e32 v9, s64 3922; GCN-HSA-NEXT: v_mov_b32_e32 v10, s47 3923; GCN-HSA-NEXT: v_mov_b32_e32 v11, s63 3924; GCN-HSA-NEXT: v_mov_b32_e32 v12, s44 3925; GCN-HSA-NEXT: v_mov_b32_e32 v13, s62 3926; GCN-HSA-NEXT: v_mov_b32_e32 v14, s45 3927; GCN-HSA-NEXT: v_mov_b32_e32 v15, s61 3928; GCN-HSA-NEXT: v_mov_b32_e32 v20, s40 3929; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[0:3] 3930; GCN-HSA-NEXT: v_mov_b32_e32 v21, s58 3931; GCN-HSA-NEXT: v_mov_b32_e32 v0, s38 3932; GCN-HSA-NEXT: v_mov_b32_e32 v22, s41 3933; GCN-HSA-NEXT: v_mov_b32_e32 v23, s57 3934; GCN-HSA-NEXT: v_mov_b32_e32 v1, s56 3935; GCN-HSA-NEXT: v_mov_b32_e32 v2, s39 3936; GCN-HSA-NEXT: v_mov_b32_e32 v4, s36 3937; GCN-HSA-NEXT: v_mov_b32_e32 v3, s55 3938; GCN-HSA-NEXT: v_mov_b32_e32 v5, s54 3939; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3940; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11] 3941; GCN-HSA-NEXT: v_mov_b32_e32 v6, s37 3942; GCN-HSA-NEXT: v_mov_b32_e32 v8, s30 3943; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15] 3944; GCN-HSA-NEXT: v_mov_b32_e32 v7, s53 3945; GCN-HSA-NEXT: v_mov_b32_e32 v12, s28 3946; GCN-HSA-NEXT: v_mov_b32_e32 v9, s52 3947; GCN-HSA-NEXT: v_mov_b32_e32 v10, s31 3948; GCN-HSA-NEXT: v_mov_b32_e32 v11, s35 3949; GCN-HSA-NEXT: v_mov_b32_e32 v13, s34 3950; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[20:23] 3951; GCN-HSA-NEXT: v_mov_b32_e32 v14, s29 3952; GCN-HSA-NEXT: v_mov_b32_e32 v15, s33 3953; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3] 3954; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[4:7] 3955; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 3956; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 3957; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3958; GCN-HSA-NEXT: s_sext_i32_i16 s27, s27 3959; GCN-HSA-NEXT: s_sext_i32_i16 s26, s26 3960; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3961; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 3962; GCN-HSA-NEXT: v_mov_b32_e32 v0, s26 3963; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 3964; GCN-HSA-NEXT: v_mov_b32_e32 v2, s27 3965; GCN-HSA-NEXT: v_mov_b32_e32 v3, s24 3966; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3967; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3968; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3969; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3970; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 3971; GCN-HSA-NEXT: v_mov_b32_e32 v0, s23 3972; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 3973; GCN-HSA-NEXT: v_mov_b32_e32 v2, s22 3974; GCN-HSA-NEXT: v_mov_b32_e32 v3, s20 3975; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3976; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3977; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3978; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3979; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 3980; GCN-HSA-NEXT: v_mov_b32_e32 v0, s19 3981; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17 3982; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18 3983; GCN-HSA-NEXT: v_mov_b32_e32 v3, s16 3984; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3985; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3986; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3987; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3988; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 3989; GCN-HSA-NEXT: v_mov_b32_e32 v0, s15 3990; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13 3991; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14 3992; GCN-HSA-NEXT: v_mov_b32_e32 v3, s12 3993; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3994; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3995; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3996; GCN-HSA-NEXT: v_mov_b32_e32 v0, s11 3997; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 3998; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 3999; GCN-HSA-NEXT: v_mov_b32_e32 v3, s8 4000; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 4001; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4002; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4003; GCN-HSA-NEXT: v_mov_b32_e32 v0, s7 4004; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 4005; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 4006; GCN-HSA-NEXT: v_mov_b32_e32 v3, s4 4007; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4008; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4009; GCN-HSA-NEXT: s_endpgm 4010; 4011; GCN-NOHSA-VI-LABEL: constant_sextload_v64i16_to_v64i32: 4012; GCN-NOHSA-VI: ; %bb.0: 4013; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[36:39], s[0:1], 0x24 4014; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4015; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[16:31], s[38:39], 0x0 4016; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[38:39], 0x40 4017; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4018; GCN-NOHSA-VI-NEXT: s_ashr_i32 s49, s31, 16 4019; GCN-NOHSA-VI-NEXT: s_ashr_i32 s69, s15, 16 4020; GCN-NOHSA-VI-NEXT: s_ashr_i32 s70, s14, 16 4021; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s15, s15 4022; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s14, s14 4023; GCN-NOHSA-VI-NEXT: s_ashr_i32 s51, s1, 16 4024; GCN-NOHSA-VI-NEXT: s_ashr_i32 s52, s0, 16 4025; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s53, s1 4026; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s54, s0 4027; GCN-NOHSA-VI-NEXT: s_ashr_i32 s55, s3, 16 4028; GCN-NOHSA-VI-NEXT: s_ashr_i32 s56, s2, 16 4029; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s57, s3 4030; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s58, s2 4031; GCN-NOHSA-VI-NEXT: s_ashr_i32 s67, s13, 16 4032; GCN-NOHSA-VI-NEXT: s_ashr_i32 s68, s12, 16 4033; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s13, s13 4034; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s12, s12 4035; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 4036; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 4037; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s36 4038; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s37 4039; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 4040; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s70 4041; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15 4042; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s69 4043; GCN-NOHSA-VI-NEXT: s_ashr_i32 s65, s11, 16 4044; GCN-NOHSA-VI-NEXT: s_ashr_i32 s66, s10, 16 4045; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s11, s11 4046; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s10, s10 4047; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 4048; GCN-NOHSA-VI-NEXT: s_ashr_i32 s63, s9, 16 4049; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 4050; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s68 4051; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 4052; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s67 4053; GCN-NOHSA-VI-NEXT: s_ashr_i32 s64, s8, 16 4054; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s9, s9 4055; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s8, s8 4056; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 4057; GCN-NOHSA-VI-NEXT: s_ashr_i32 s61, s7, 16 4058; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 4059; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s66 4060; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 4061; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s65 4062; GCN-NOHSA-VI-NEXT: s_ashr_i32 s62, s6, 16 4063; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s7, s7 4064; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s6, s6 4065; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 4066; GCN-NOHSA-VI-NEXT: s_ashr_i32 s59, s5, 16 4067; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 4068; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s64 4069; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 4070; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s63 4071; GCN-NOHSA-VI-NEXT: s_ashr_i32 s60, s4, 16 4072; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s5, s5 4073; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s4, s4 4074; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 4075; GCN-NOHSA-VI-NEXT: s_ashr_i32 s50, s30, 16 4076; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 4077; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s62 4078; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 4079; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s61 4080; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 4081; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s31, s31 4082; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 4083; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s60 4084; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 4085; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s59 4086; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 4087; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s30, s30 4088; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s58 4089; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s56 4090; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s57 4091; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s55 4092; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 4093; GCN-NOHSA-VI-NEXT: s_ashr_i32 s47, s29, 16 4094; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s54 4095; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s52 4096; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s53 4097; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s51 4098; GCN-NOHSA-VI-NEXT: s_ashr_i32 s48, s28, 16 4099; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s29, s29 4100; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s28, s28 4101; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 4102; GCN-NOHSA-VI-NEXT: s_ashr_i32 s45, s27, 16 4103; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30 4104; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s50 4105; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s31 4106; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s49 4107; GCN-NOHSA-VI-NEXT: s_ashr_i32 s46, s26, 16 4108; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s27, s27 4109; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s26, s26 4110; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 4111; GCN-NOHSA-VI-NEXT: s_ashr_i32 s43, s25, 16 4112; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 4113; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s48 4114; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s29 4115; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s47 4116; GCN-NOHSA-VI-NEXT: s_ashr_i32 s44, s24, 16 4117; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s25, s25 4118; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s24, s24 4119; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 4120; GCN-NOHSA-VI-NEXT: s_ashr_i32 s41, s23, 16 4121; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s26 4122; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s46 4123; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s27 4124; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s45 4125; GCN-NOHSA-VI-NEXT: s_ashr_i32 s42, s22, 16 4126; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s23, s23 4127; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s22, s22 4128; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 4129; GCN-NOHSA-VI-NEXT: s_ashr_i32 s39, s21, 16 4130; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 4131; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s44 4132; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s25 4133; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s43 4134; GCN-NOHSA-VI-NEXT: s_ashr_i32 s40, s20, 16 4135; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s21, s21 4136; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s20, s20 4137; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 4138; GCN-NOHSA-VI-NEXT: s_ashr_i32 s35, s19, 16 4139; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22 4140; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s42 4141; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s23 4142; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s41 4143; GCN-NOHSA-VI-NEXT: s_ashr_i32 s38, s18, 16 4144; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s19, s19 4145; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s18, s18 4146; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 4147; GCN-NOHSA-VI-NEXT: s_ashr_i32 s33, s17, 16 4148; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20 4149; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s40 4150; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s21 4151; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s39 4152; GCN-NOHSA-VI-NEXT: s_ashr_i32 s34, s16, 16 4153; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s17, s17 4154; GCN-NOHSA-VI-NEXT: s_sext_i32_i16 s16, s16 4155; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 4156; GCN-NOHSA-VI-NEXT: s_nop 0 4157; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 4158; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s38 4159; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s19 4160; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 4161; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4162; GCN-NOHSA-VI-NEXT: s_nop 0 4163; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16 4164; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s34 4165; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s17 4166; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s33 4167; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4168; GCN-NOHSA-VI-NEXT: s_endpgm 4169; 4170; EG-LABEL: constant_sextload_v64i16_to_v64i32: 4171; EG: ; %bb.0: 4172; EG-NEXT: ALU 17, @38, KC0[CB0:0-32], KC1[] 4173; EG-NEXT: TEX 7 @22 4174; EG-NEXT: ALU 75, @56, KC0[CB0:0-32], KC1[] 4175; EG-NEXT: ALU 71, @132, KC0[CB0:0-32], KC1[] 4176; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T66.XYZW, T48.X, 0 4177; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T41.X, 0 4178; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T56.X, 0 4179; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T55.X, 0 4180; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T54.X, 0 4181; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T53.X, 0 4182; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T52.X, 0 4183; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T51.X, 0 4184; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T50.X, 0 4185; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T49.X, 0 4186; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T40.X, 0 4187; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T39.X, 0 4188; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T38.X, 0 4189; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T37.X, 0 4190; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T36.X, 0 4191; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T35.X, 1 4192; EG-NEXT: CF_END 4193; EG-NEXT: PAD 4194; EG-NEXT: Fetch clause starting at 22: 4195; EG-NEXT: VTX_READ_128 T42.XYZW, T41.X, 16, #1 4196; EG-NEXT: VTX_READ_128 T43.XYZW, T41.X, 32, #1 4197; EG-NEXT: VTX_READ_128 T44.XYZW, T41.X, 0, #1 4198; EG-NEXT: VTX_READ_128 T45.XYZW, T41.X, 48, #1 4199; EG-NEXT: VTX_READ_128 T46.XYZW, T41.X, 64, #1 4200; EG-NEXT: VTX_READ_128 T47.XYZW, T41.X, 80, #1 4201; EG-NEXT: VTX_READ_128 T48.XYZW, T41.X, 96, #1 4202; EG-NEXT: VTX_READ_128 T41.XYZW, T41.X, 112, #1 4203; EG-NEXT: ALU clause starting at 38: 4204; EG-NEXT: LSHR T35.X, KC0[2].Y, literal.x, 4205; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4206; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4207; EG-NEXT: LSHR T36.X, PV.W, literal.x, 4208; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4209; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 4210; EG-NEXT: LSHR T37.X, PV.W, literal.x, 4211; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4212; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 4213; EG-NEXT: LSHR T38.X, PV.W, literal.x, 4214; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4215; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 4216; EG-NEXT: LSHR T39.X, PV.W, literal.x, 4217; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4218; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 4219; EG-NEXT: LSHR T40.X, PV.W, literal.x, 4220; EG-NEXT: MOV * T41.X, KC0[2].Z, 4221; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4222; EG-NEXT: ALU clause starting at 56: 4223; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4224; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 4225; EG-NEXT: LSHR T49.X, PV.W, literal.x, 4226; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4227; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 4228; EG-NEXT: LSHR T50.X, PV.W, literal.x, 4229; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4230; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 4231; EG-NEXT: LSHR T51.X, PV.W, literal.x, 4232; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4233; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 4234; EG-NEXT: LSHR T52.X, PV.W, literal.x, 4235; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4236; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 4237; EG-NEXT: LSHR T53.X, PV.W, literal.x, 4238; EG-NEXT: LSHR T0.Y, T41.W, literal.y, 4239; EG-NEXT: LSHR T0.Z, T41.Y, literal.y, 4240; EG-NEXT: LSHR T0.W, T48.W, literal.y, BS:VEC_120/SCL_212 4241; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 4242; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4243; EG-NEXT: 176(2.466285e-43), 0(0.000000e+00) 4244; EG-NEXT: LSHR T54.X, PS, literal.x, 4245; EG-NEXT: LSHR T1.Y, T48.Y, literal.y, 4246; EG-NEXT: LSHR T1.Z, T47.W, literal.y, 4247; EG-NEXT: LSHR T1.W, T47.Y, literal.y, BS:VEC_120/SCL_212 4248; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.z, 4249; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4250; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00) 4251; EG-NEXT: LSHR T55.X, PS, literal.x, 4252; EG-NEXT: LSHR T2.Y, T46.W, literal.y, 4253; EG-NEXT: LSHR T2.Z, T46.Y, literal.y, 4254; EG-NEXT: LSHR T2.W, T45.W, literal.y, BS:VEC_120/SCL_212 4255; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.z, 4256; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4257; EG-NEXT: 208(2.914701e-43), 0(0.000000e+00) 4258; EG-NEXT: LSHR T56.X, PS, literal.x, 4259; EG-NEXT: LSHR T3.Y, T45.Y, literal.y, 4260; EG-NEXT: BFE_INT T57.Z, T44.Y, 0.0, literal.y, BS:VEC_120/SCL_212 4261; EG-NEXT: LSHR T3.W, T43.W, literal.y, 4262; EG-NEXT: LSHR * T4.W, T43.Y, literal.y, 4263; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4264; EG-NEXT: BFE_INT T57.X, T44.X, 0.0, literal.x, 4265; EG-NEXT: LSHR T4.Y, T42.W, literal.x, 4266; EG-NEXT: BFE_INT T58.Z, T44.W, 0.0, literal.x, BS:VEC_120/SCL_212 4267; EG-NEXT: LSHR T5.W, T42.Y, literal.x, 4268; EG-NEXT: LSHR * T6.W, T44.Y, literal.x, 4269; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4270; EG-NEXT: BFE_INT T58.X, T44.Z, 0.0, literal.x, 4271; EG-NEXT: LSHR T5.Y, T44.W, literal.x, 4272; EG-NEXT: BFE_INT T59.Z, T42.Y, 0.0, literal.x, 4273; EG-NEXT: BFE_INT T57.W, PS, 0.0, literal.x, 4274; EG-NEXT: LSHR * T6.W, T44.X, literal.x, 4275; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4276; EG-NEXT: BFE_INT T59.X, T42.X, 0.0, literal.x, 4277; EG-NEXT: BFE_INT T57.Y, PS, 0.0, literal.x, 4278; EG-NEXT: BFE_INT T60.Z, T42.W, 0.0, literal.x, 4279; EG-NEXT: BFE_INT T58.W, PV.Y, 0.0, literal.x, 4280; EG-NEXT: LSHR * T6.W, T44.Z, literal.x, 4281; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4282; EG-NEXT: BFE_INT T60.X, T42.Z, 0.0, literal.x, 4283; EG-NEXT: BFE_INT T58.Y, PS, 0.0, literal.x, 4284; EG-NEXT: BFE_INT T44.Z, T43.Y, 0.0, literal.x, 4285; EG-NEXT: BFE_INT T59.W, T5.W, 0.0, literal.x, 4286; EG-NEXT: LSHR * T5.W, T42.X, literal.x, 4287; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4288; EG-NEXT: BFE_INT T44.X, T43.X, 0.0, literal.x, 4289; EG-NEXT: BFE_INT T59.Y, PS, 0.0, literal.x, 4290; EG-NEXT: BFE_INT T61.Z, T43.W, 0.0, literal.x, 4291; EG-NEXT: BFE_INT T60.W, T4.Y, 0.0, literal.x, 4292; EG-NEXT: LSHR * T5.W, T42.Z, literal.x, 4293; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4294; EG-NEXT: BFE_INT T61.X, T43.Z, 0.0, literal.x, 4295; EG-NEXT: BFE_INT T60.Y, PS, 0.0, literal.x, 4296; EG-NEXT: BFE_INT T42.Z, T45.Y, 0.0, literal.x, 4297; EG-NEXT: BFE_INT * T44.W, T4.W, 0.0, literal.x, 4298; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4299; EG-NEXT: ALU clause starting at 132: 4300; EG-NEXT: LSHR * T4.W, T43.X, literal.x, 4301; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4302; EG-NEXT: BFE_INT T42.X, T45.X, 0.0, literal.x, 4303; EG-NEXT: BFE_INT T44.Y, PV.W, 0.0, literal.x, 4304; EG-NEXT: BFE_INT T62.Z, T45.W, 0.0, literal.x, 4305; EG-NEXT: BFE_INT T61.W, T3.W, 0.0, literal.x, BS:VEC_120/SCL_212 4306; EG-NEXT: LSHR * T3.W, T43.Z, literal.x, 4307; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4308; EG-NEXT: BFE_INT T62.X, T45.Z, 0.0, literal.x, 4309; EG-NEXT: BFE_INT T61.Y, PS, 0.0, literal.x, 4310; EG-NEXT: BFE_INT T43.Z, T46.Y, 0.0, literal.x, 4311; EG-NEXT: BFE_INT T42.W, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4312; EG-NEXT: LSHR * T3.W, T45.X, literal.x, 4313; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4314; EG-NEXT: BFE_INT T43.X, T46.X, 0.0, literal.x, 4315; EG-NEXT: BFE_INT T42.Y, PS, 0.0, literal.x, 4316; EG-NEXT: BFE_INT T63.Z, T46.W, 0.0, literal.x, 4317; EG-NEXT: BFE_INT T62.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212 4318; EG-NEXT: LSHR * T2.W, T45.Z, literal.x, 4319; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4320; EG-NEXT: BFE_INT T63.X, T46.Z, 0.0, literal.x, 4321; EG-NEXT: BFE_INT T62.Y, PS, 0.0, literal.x, 4322; EG-NEXT: BFE_INT T45.Z, T47.Y, 0.0, literal.x, 4323; EG-NEXT: BFE_INT T43.W, T2.Z, 0.0, literal.x, BS:VEC_120/SCL_212 4324; EG-NEXT: LSHR * T2.W, T46.X, literal.x, 4325; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4326; EG-NEXT: BFE_INT T45.X, T47.X, 0.0, literal.x, 4327; EG-NEXT: BFE_INT T43.Y, PS, 0.0, literal.x, 4328; EG-NEXT: BFE_INT T64.Z, T47.W, 0.0, literal.x, 4329; EG-NEXT: BFE_INT T63.W, T2.Y, 0.0, literal.x, 4330; EG-NEXT: LSHR * T2.W, T46.Z, literal.x, 4331; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4332; EG-NEXT: BFE_INT T64.X, T47.Z, 0.0, literal.x, 4333; EG-NEXT: BFE_INT T63.Y, PS, 0.0, literal.x, 4334; EG-NEXT: BFE_INT T46.Z, T48.Y, 0.0, literal.x, 4335; EG-NEXT: BFE_INT T45.W, T1.W, 0.0, literal.x, 4336; EG-NEXT: LSHR * T1.W, T47.X, literal.x, 4337; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4338; EG-NEXT: BFE_INT T46.X, T48.X, 0.0, literal.x, 4339; EG-NEXT: BFE_INT T45.Y, PS, 0.0, literal.x, 4340; EG-NEXT: BFE_INT T65.Z, T48.W, 0.0, literal.x, 4341; EG-NEXT: BFE_INT T64.W, T1.Z, 0.0, literal.x, 4342; EG-NEXT: LSHR * T1.W, T47.Z, literal.x, 4343; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4344; EG-NEXT: BFE_INT T65.X, T48.Z, 0.0, literal.x, 4345; EG-NEXT: BFE_INT T64.Y, PS, 0.0, literal.x, 4346; EG-NEXT: BFE_INT T47.Z, T41.Y, 0.0, literal.x, 4347; EG-NEXT: BFE_INT T46.W, T1.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4348; EG-NEXT: LSHR * T1.W, T48.X, literal.x, 4349; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4350; EG-NEXT: BFE_INT T47.X, T41.X, 0.0, literal.x, 4351; EG-NEXT: BFE_INT T46.Y, PS, 0.0, literal.x, 4352; EG-NEXT: BFE_INT T66.Z, T41.W, 0.0, literal.x, 4353; EG-NEXT: BFE_INT T65.W, T0.W, 0.0, literal.x, BS:VEC_120/SCL_212 4354; EG-NEXT: LSHR * T0.W, T48.Z, literal.x, 4355; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4356; EG-NEXT: BFE_INT T66.X, T41.Z, 0.0, literal.x, 4357; EG-NEXT: BFE_INT T65.Y, PS, 0.0, literal.x, 4358; EG-NEXT: LSHR T1.Z, T41.X, literal.x, 4359; EG-NEXT: BFE_INT T47.W, T0.Z, 0.0, literal.x, BS:VEC_120/SCL_212 4360; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4361; EG-NEXT: 16(2.242078e-44), 224(3.138909e-43) 4362; EG-NEXT: LSHR T41.X, PS, literal.x, 4363; EG-NEXT: BFE_INT T47.Y, PV.Z, 0.0, literal.y, 4364; EG-NEXT: LSHR T0.Z, T41.Z, literal.y, 4365; EG-NEXT: BFE_INT T66.W, T0.Y, 0.0, literal.y, 4366; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 4367; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4368; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) 4369; EG-NEXT: LSHR T48.X, PS, literal.x, 4370; EG-NEXT: BFE_INT * T66.Y, PV.Z, 0.0, literal.y, 4371; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4372 %load = load <64 x i16>, <64 x i16> addrspace(4)* %in 4373 %ext = sext <64 x i16> %load to <64 x i32> 4374 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 4375 ret void 4376} 4377 4378define amdgpu_kernel void @constant_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 4379; GCN-NOHSA-SI-LABEL: constant_zextload_i16_to_i64: 4380; GCN-NOHSA-SI: ; %bb.0: 4381; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4382; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 4383; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 4384; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 4385; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 4386; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4387; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 4388; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 4389; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4390; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 4391; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 4392; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 4393; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4394; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4395; GCN-NOHSA-SI-NEXT: s_endpgm 4396; 4397; GCN-HSA-LABEL: constant_zextload_i16_to_i64: 4398; GCN-HSA: ; %bb.0: 4399; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4400; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4401; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4402; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4403; GCN-HSA-NEXT: flat_load_ushort v0, v[0:1] 4404; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 4405; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 4406; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 4407; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 4408; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 4409; GCN-HSA-NEXT: s_endpgm 4410; 4411; GCN-NOHSA-VI-LABEL: constant_zextload_i16_to_i64: 4412; GCN-NOHSA-VI: ; %bb.0: 4413; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4414; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 4415; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 4416; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 4417; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 4418; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4419; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 4420; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 4421; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4422; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 4423; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 4424; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 4425; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4426; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 4427; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4428; GCN-NOHSA-VI-NEXT: s_endpgm 4429; 4430; EG-LABEL: constant_zextload_i16_to_i64: 4431; EG: ; %bb.0: 4432; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4433; EG-NEXT: TEX 0 @6 4434; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 4435; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4436; EG-NEXT: CF_END 4437; EG-NEXT: PAD 4438; EG-NEXT: Fetch clause starting at 6: 4439; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 4440; EG-NEXT: ALU clause starting at 8: 4441; EG-NEXT: MOV * T0.X, KC0[2].Z, 4442; EG-NEXT: ALU clause starting at 9: 4443; EG-NEXT: MOV * T0.Y, 0.0, 4444; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 4445; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4446 %a = load i16, i16 addrspace(4)* %in 4447 %ext = zext i16 %a to i64 4448 store i64 %ext, i64 addrspace(1)* %out 4449 ret void 4450} 4451 4452; FIXME: Need to optimize this sequence to avoid extra bfe: 4453; t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64 4454; t31: i64 = any_extend t28 4455; t33: i64 = sign_extend_inreg t31, ValueType:ch:i16 4456; TODO: These could be expanded earlier using ASHR 15 4457define amdgpu_kernel void @constant_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(4)* %in) #0 { 4458; GCN-NOHSA-SI-LABEL: constant_sextload_i16_to_i64: 4459; GCN-NOHSA-SI: ; %bb.0: 4460; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4461; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 4462; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 4463; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 4464; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 4465; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4466; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 4467; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 4468; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 4469; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 4470; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 4471; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4472; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4473; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4474; GCN-NOHSA-SI-NEXT: s_endpgm 4475; 4476; GCN-HSA-LABEL: constant_sextload_i16_to_i64: 4477; GCN-HSA: ; %bb.0: 4478; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4479; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4480; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4481; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4482; GCN-HSA-NEXT: flat_load_sshort v0, v[0:1] 4483; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 4484; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 4485; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 4486; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4487; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 4488; GCN-HSA-NEXT: s_endpgm 4489; 4490; GCN-NOHSA-VI-LABEL: constant_sextload_i16_to_i64: 4491; GCN-NOHSA-VI: ; %bb.0: 4492; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4493; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 4494; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 4495; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 4496; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 4497; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4498; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 4499; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 4500; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4501; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 4502; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 4503; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4504; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16 4505; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4506; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4507; GCN-NOHSA-VI-NEXT: s_endpgm 4508; 4509; EG-LABEL: constant_sextload_i16_to_i64: 4510; EG: ; %bb.0: 4511; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4512; EG-NEXT: TEX 0 @6 4513; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 4514; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4515; EG-NEXT: CF_END 4516; EG-NEXT: PAD 4517; EG-NEXT: Fetch clause starting at 6: 4518; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 4519; EG-NEXT: ALU clause starting at 8: 4520; EG-NEXT: MOV * T0.X, KC0[2].Z, 4521; EG-NEXT: ALU clause starting at 9: 4522; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 4523; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 4524; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 4525; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 4526; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 4527 %a = load i16, i16 addrspace(4)* %in 4528 %ext = sext i16 %a to i64 4529 store i64 %ext, i64 addrspace(1)* %out 4530 ret void 4531} 4532 4533define amdgpu_kernel void @constant_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 { 4534; GCN-NOHSA-SI-LABEL: constant_zextload_v1i16_to_v1i64: 4535; GCN-NOHSA-SI: ; %bb.0: 4536; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4537; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 4538; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 4539; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 4540; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 4541; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4542; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 4543; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 4544; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4545; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 4546; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 4547; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 4548; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4549; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4550; GCN-NOHSA-SI-NEXT: s_endpgm 4551; 4552; GCN-HSA-LABEL: constant_zextload_v1i16_to_v1i64: 4553; GCN-HSA: ; %bb.0: 4554; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4555; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4556; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4557; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4558; GCN-HSA-NEXT: flat_load_ushort v0, v[0:1] 4559; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 4560; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 4561; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 4562; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 4563; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 4564; GCN-HSA-NEXT: s_endpgm 4565; 4566; GCN-NOHSA-VI-LABEL: constant_zextload_v1i16_to_v1i64: 4567; GCN-NOHSA-VI: ; %bb.0: 4568; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4569; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 4570; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 4571; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 4572; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 4573; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4574; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 4575; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 4576; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4577; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 4578; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 4579; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 4580; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4581; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 4582; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4583; GCN-NOHSA-VI-NEXT: s_endpgm 4584; 4585; EG-LABEL: constant_zextload_v1i16_to_v1i64: 4586; EG: ; %bb.0: 4587; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4588; EG-NEXT: TEX 0 @6 4589; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 4590; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4591; EG-NEXT: CF_END 4592; EG-NEXT: PAD 4593; EG-NEXT: Fetch clause starting at 6: 4594; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 4595; EG-NEXT: ALU clause starting at 8: 4596; EG-NEXT: MOV * T0.X, KC0[2].Z, 4597; EG-NEXT: ALU clause starting at 9: 4598; EG-NEXT: MOV * T0.Y, 0.0, 4599; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 4600; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4601 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in 4602 %ext = zext <1 x i16> %load to <1 x i64> 4603 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 4604 ret void 4605} 4606 4607define amdgpu_kernel void @constant_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(4)* %in) #0 { 4608; GCN-NOHSA-SI-LABEL: constant_sextload_v1i16_to_v1i64: 4609; GCN-NOHSA-SI: ; %bb.0: 4610; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4611; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 4612; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 4613; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 4614; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 4615; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4616; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 4617; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 4618; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 4619; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 4620; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 4621; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4622; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4623; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4624; GCN-NOHSA-SI-NEXT: s_endpgm 4625; 4626; GCN-HSA-LABEL: constant_sextload_v1i16_to_v1i64: 4627; GCN-HSA: ; %bb.0: 4628; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4629; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4630; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4631; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4632; GCN-HSA-NEXT: flat_load_sshort v0, v[0:1] 4633; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 4634; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 4635; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 4636; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4637; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 4638; GCN-HSA-NEXT: s_endpgm 4639; 4640; GCN-NOHSA-VI-LABEL: constant_sextload_v1i16_to_v1i64: 4641; GCN-NOHSA-VI: ; %bb.0: 4642; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4643; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 4644; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 4645; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 4646; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 4647; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4648; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 4649; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 4650; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 4651; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 4652; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 4653; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4654; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16 4655; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 4656; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 4657; GCN-NOHSA-VI-NEXT: s_endpgm 4658; 4659; EG-LABEL: constant_sextload_v1i16_to_v1i64: 4660; EG: ; %bb.0: 4661; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4662; EG-NEXT: TEX 0 @6 4663; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 4664; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 4665; EG-NEXT: CF_END 4666; EG-NEXT: PAD 4667; EG-NEXT: Fetch clause starting at 6: 4668; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 4669; EG-NEXT: ALU clause starting at 8: 4670; EG-NEXT: MOV * T0.X, KC0[2].Z, 4671; EG-NEXT: ALU clause starting at 9: 4672; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 4673; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 4674; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 4675; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 4676; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 4677 %load = load <1 x i16>, <1 x i16> addrspace(4)* %in 4678 %ext = sext <1 x i16> %load to <1 x i64> 4679 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 4680 ret void 4681} 4682 4683define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 { 4684; GCN-NOHSA-SI-LABEL: constant_zextload_v2i16_to_v2i64: 4685; GCN-NOHSA-SI: ; %bb.0: 4686; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4687; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4688; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 4689; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 4690; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 4691; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4692; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s2, 16 4693; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s2, 0xffff 4694; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 4695; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 4696; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 4697; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 4698; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4699; GCN-NOHSA-SI-NEXT: s_endpgm 4700; 4701; GCN-HSA-LABEL: constant_zextload_v2i16_to_v2i64: 4702; GCN-HSA: ; %bb.0: 4703; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4704; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 4705; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 4706; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4707; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 4708; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4709; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4710; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4711; GCN-HSA-NEXT: s_lshr_b32 s0, s2, 16 4712; GCN-HSA-NEXT: s_and_b32 s1, s2, 0xffff 4713; GCN-HSA-NEXT: v_mov_b32_e32 v0, s1 4714; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 4715; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4716; GCN-HSA-NEXT: s_endpgm 4717; 4718; GCN-NOHSA-VI-LABEL: constant_zextload_v2i16_to_v2i64: 4719; GCN-NOHSA-VI: ; %bb.0: 4720; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4721; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 4722; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 4723; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 4724; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 4725; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4726; GCN-NOHSA-VI-NEXT: s_load_dword s2, s[2:3], 0x0 4727; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 4728; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 4729; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4730; GCN-NOHSA-VI-NEXT: s_and_b32 s0, s2, 0xffff 4731; GCN-NOHSA-VI-NEXT: s_lshr_b32 s1, s2, 16 4732; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 4733; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s1 4734; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 4735; GCN-NOHSA-VI-NEXT: s_endpgm 4736; 4737; EG-LABEL: constant_zextload_v2i16_to_v2i64: 4738; EG: ; %bb.0: 4739; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4740; EG-NEXT: TEX 0 @6 4741; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[] 4742; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 4743; EG-NEXT: CF_END 4744; EG-NEXT: PAD 4745; EG-NEXT: Fetch clause starting at 6: 4746; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 4747; EG-NEXT: ALU clause starting at 8: 4748; EG-NEXT: MOV * T4.X, KC0[2].Z, 4749; EG-NEXT: ALU clause starting at 9: 4750; EG-NEXT: LSHR * T4.Z, T4.X, literal.x, 4751; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4752; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 4753; EG-NEXT: MOV T4.Y, 0.0, 4754; EG-NEXT: MOV T4.W, 0.0, 4755; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 4756; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 4757 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in 4758 %ext = zext <2 x i16> %load to <2 x i64> 4759 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 4760 ret void 4761} 4762 4763define amdgpu_kernel void @constant_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(4)* %in) #0 { 4764; GCN-NOHSA-SI-LABEL: constant_sextload_v2i16_to_v2i64: 4765; GCN-NOHSA-SI: ; %bb.0: 4766; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4767; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4768; GCN-NOHSA-SI-NEXT: s_load_dword s2, s[2:3], 0x0 4769; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 4770; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4771; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s2, 16 4772; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[2:3], 0x100000 4773; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 4774; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 4775; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 4776; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7 4777; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 4778; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 4779; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4780; GCN-NOHSA-SI-NEXT: s_endpgm 4781; 4782; GCN-HSA-LABEL: constant_sextload_v2i16_to_v2i64: 4783; GCN-HSA: ; %bb.0: 4784; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4785; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4786; GCN-HSA-NEXT: s_load_dword s2, s[2:3], 0x0 4787; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4788; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4789; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4790; GCN-HSA-NEXT: s_lshr_b32 s0, s2, 16 4791; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 4792; GCN-HSA-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x100000 4793; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4794; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4795; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 4796; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 4797; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4798; GCN-HSA-NEXT: s_endpgm 4799; 4800; GCN-NOHSA-VI-LABEL: constant_sextload_v2i16_to_v2i64: 4801; GCN-NOHSA-VI: ; %bb.0: 4802; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4803; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4804; GCN-NOHSA-VI-NEXT: s_load_dword s4, s[2:3], 0x0 4805; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 4806; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 4807; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4808; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[4:5], 0x100000 4809; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 4810; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 4811; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 4812; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7 4813; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 4814; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 4815; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4816; GCN-NOHSA-VI-NEXT: s_endpgm 4817; 4818; EG-LABEL: constant_sextload_v2i16_to_v2i64: 4819; EG: ; %bb.0: 4820; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4821; EG-NEXT: TEX 0 @6 4822; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 4823; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 4824; EG-NEXT: CF_END 4825; EG-NEXT: PAD 4826; EG-NEXT: Fetch clause starting at 6: 4827; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 4828; EG-NEXT: ALU clause starting at 8: 4829; EG-NEXT: MOV * T4.X, KC0[2].Z, 4830; EG-NEXT: ALU clause starting at 9: 4831; EG-NEXT: ASHR * T4.W, T4.X, literal.x, 4832; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 4833; EG-NEXT: ASHR * T4.Z, T4.X, literal.x, 4834; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4835; EG-NEXT: BFE_INT T4.X, T4.X, 0.0, literal.x, 4836; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 4837; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 4838; EG-NEXT: ASHR * T4.Y, PV.X, literal.x, 4839; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 4840 %load = load <2 x i16>, <2 x i16> addrspace(4)* %in 4841 %ext = sext <2 x i16> %load to <2 x i64> 4842 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 4843 ret void 4844} 4845 4846define amdgpu_kernel void @constant_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 { 4847; GCN-NOHSA-SI-LABEL: constant_zextload_v4i16_to_v4i64: 4848; GCN-NOHSA-SI: ; %bb.0: 4849; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4850; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 4851; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4852; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 4853; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 4854; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 4855; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, 0xffff 4856; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 4857; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 4858; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 4859; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4860; GCN-NOHSA-SI-NEXT: s_lshr_b32 s0, s3, 16 4861; GCN-NOHSA-SI-NEXT: s_lshr_b32 s1, s2, 16 4862; GCN-NOHSA-SI-NEXT: s_and_b32 s3, s3, s8 4863; GCN-NOHSA-SI-NEXT: s_and_b32 s2, s2, s8 4864; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s3 4865; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s0 4866; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 4867; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4868; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s2 4869; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s1 4870; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 4871; GCN-NOHSA-SI-NEXT: s_endpgm 4872; 4873; GCN-HSA-LABEL: constant_zextload_v4i16_to_v4i64: 4874; GCN-HSA: ; %bb.0: 4875; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4876; GCN-HSA-NEXT: s_mov_b32 s4, 0xffff 4877; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 4878; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 4879; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4880; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 4881; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4882; GCN-HSA-NEXT: s_lshr_b32 s5, s3, 16 4883; GCN-HSA-NEXT: s_lshr_b32 s6, s2, 16 4884; GCN-HSA-NEXT: s_and_b32 s7, s2, s4 4885; GCN-HSA-NEXT: s_and_b32 s2, s3, s4 4886; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4887; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 4888; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4889; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 4890; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 4891; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 4892; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4893; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4894; GCN-HSA-NEXT: v_mov_b32_e32 v0, s7 4895; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 4896; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4897; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4898; GCN-HSA-NEXT: s_endpgm 4899; 4900; GCN-NOHSA-VI-LABEL: constant_zextload_v4i16_to_v4i64: 4901; GCN-NOHSA-VI: ; %bb.0: 4902; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 4903; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, 0xffff 4904; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 4905; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 4906; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 4907; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4908; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 4909; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 4910; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 4911; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 4912; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4913; GCN-NOHSA-VI-NEXT: s_and_b32 s0, s2, s8 4914; GCN-NOHSA-VI-NEXT: s_lshr_b32 s1, s2, 16 4915; GCN-NOHSA-VI-NEXT: s_and_b32 s2, s3, s8 4916; GCN-NOHSA-VI-NEXT: s_lshr_b32 s3, s3, 16 4917; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s2 4918; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s3 4919; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 4920; GCN-NOHSA-VI-NEXT: s_nop 0 4921; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s0 4922; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s1 4923; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 4924; GCN-NOHSA-VI-NEXT: s_endpgm 4925; 4926; EG-LABEL: constant_zextload_v4i16_to_v4i64: 4927; EG: ; %bb.0: 4928; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 4929; EG-NEXT: TEX 0 @6 4930; EG-NEXT: ALU 18, @9, KC0[CB0:0-32], KC1[] 4931; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T8.X, 0 4932; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T7.X, 1 4933; EG-NEXT: CF_END 4934; EG-NEXT: Fetch clause starting at 6: 4935; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 4936; EG-NEXT: ALU clause starting at 8: 4937; EG-NEXT: MOV * T5.X, KC0[2].Z, 4938; EG-NEXT: ALU clause starting at 9: 4939; EG-NEXT: MOV T2.X, T5.X, 4940; EG-NEXT: MOV * T3.X, T5.Y, 4941; EG-NEXT: MOV T0.Y, PV.X, 4942; EG-NEXT: MOV * T0.Z, PS, 4943; EG-NEXT: LSHR * T5.Z, PV.Z, literal.x, 4944; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4945; EG-NEXT: AND_INT T5.X, T0.Z, literal.x, 4946; EG-NEXT: MOV T5.Y, 0.0, 4947; EG-NEXT: LSHR T6.Z, T0.Y, literal.y, 4948; EG-NEXT: AND_INT * T6.X, T0.Y, literal.x, 4949; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 4950; EG-NEXT: MOV T6.Y, 0.0, 4951; EG-NEXT: MOV T5.W, 0.0, 4952; EG-NEXT: MOV * T6.W, 0.0, 4953; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 4954; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4955; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4956; EG-NEXT: LSHR * T8.X, PV.W, literal.x, 4957; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4958 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in 4959 %ext = zext <4 x i16> %load to <4 x i64> 4960 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 4961 ret void 4962} 4963 4964define amdgpu_kernel void @constant_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(4)* %in) #0 { 4965; GCN-NOHSA-SI-LABEL: constant_sextload_v4i16_to_v4i64: 4966; GCN-NOHSA-SI: ; %bb.0: 4967; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 4968; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4969; GCN-NOHSA-SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 4970; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 4971; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 4972; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4973; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, s5 4974; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s4, 16 4975; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[4:5], 0x100000 4976; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 4977; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 4978; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 4979; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 4980; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s7 4981; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 4982; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 4983; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4984; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4985; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 4986; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11 4987; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8 4988; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9 4989; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 4990; GCN-NOHSA-SI-NEXT: s_endpgm 4991; 4992; GCN-HSA-LABEL: constant_sextload_v4i16_to_v4i64: 4993; GCN-HSA: ; %bb.0: 4994; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4995; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4996; GCN-HSA-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 4997; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4998; GCN-HSA-NEXT: s_mov_b32 s4, s3 4999; GCN-HSA-NEXT: s_lshr_b32 s6, s2, 16 5000; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[2:3], 0x100000 5001; GCN-HSA-NEXT: s_ashr_i64 s[2:3], s[2:3], 48 5002; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 5003; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 5004; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 5005; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5006; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 5007; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5008; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5009; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 5010; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 5011; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5012; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5013; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5014; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 5015; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 5016; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 5017; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 5018; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5019; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5020; GCN-HSA-NEXT: s_endpgm 5021; 5022; GCN-NOHSA-VI-LABEL: constant_sextload_v4i16_to_v4i64: 5023; GCN-NOHSA-VI: ; %bb.0: 5024; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 5025; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5026; GCN-NOHSA-VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 5027; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5028; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5029; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5030; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s5 5031; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s5, 16 5032; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[4:5], 0x100000 5033; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 5034; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 5035; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 5036; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 5037; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 5038; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 5039; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 5040; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11 5041; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5042; GCN-NOHSA-VI-NEXT: s_nop 0 5043; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 5044; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s7 5045; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 5046; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 5047; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5048; GCN-NOHSA-VI-NEXT: s_endpgm 5049; 5050; EG-LABEL: constant_sextload_v4i16_to_v4i64: 5051; EG: ; %bb.0: 5052; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5053; EG-NEXT: TEX 0 @6 5054; EG-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[] 5055; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0 5056; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 5057; EG-NEXT: CF_END 5058; EG-NEXT: Fetch clause starting at 6: 5059; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 5060; EG-NEXT: ALU clause starting at 8: 5061; EG-NEXT: MOV * T5.X, KC0[2].Z, 5062; EG-NEXT: ALU clause starting at 9: 5063; EG-NEXT: MOV T2.X, T5.X, 5064; EG-NEXT: MOV * T3.X, T5.Y, 5065; EG-NEXT: MOV T0.Y, PS, 5066; EG-NEXT: MOV * T0.Z, PV.X, 5067; EG-NEXT: ASHR * T5.W, PV.Z, literal.x, 5068; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5069; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 5070; EG-NEXT: ASHR T5.Z, T0.Z, literal.y, 5071; EG-NEXT: ASHR * T7.W, T0.Y, literal.z, 5072; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5073; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5074; EG-NEXT: BFE_INT T5.X, T0.Z, 0.0, literal.x, 5075; EG-NEXT: ASHR * T7.Z, T0.Y, literal.x, 5076; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5077; EG-NEXT: BFE_INT T7.X, T0.Y, 0.0, literal.x, 5078; EG-NEXT: ASHR T5.Y, PV.X, literal.y, 5079; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 5080; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5081; EG-NEXT: LSHR T8.X, PV.W, literal.x, 5082; EG-NEXT: ASHR * T7.Y, PV.X, literal.y, 5083; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5084 %load = load <4 x i16>, <4 x i16> addrspace(4)* %in 5085 %ext = sext <4 x i16> %load to <4 x i64> 5086 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 5087 ret void 5088} 5089 5090define amdgpu_kernel void @constant_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 { 5091; GCN-NOHSA-SI-LABEL: constant_zextload_v8i16_to_v8i64: 5092; GCN-NOHSA-SI: ; %bb.0: 5093; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 5094; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 5095; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5096; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[8:11], s[6:7], 0x0 5097; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 5098; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5099; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, 0xffff 5100; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 5101; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 5102; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 5103; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5104; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s9, 16 5105; GCN-NOHSA-SI-NEXT: s_lshr_b32 s5, s11, 16 5106; GCN-NOHSA-SI-NEXT: s_lshr_b32 s7, s10, 16 5107; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s8, 16 5108; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, s6 5109; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, s6 5110; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, s6 5111; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s9, s6 5112; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s11 5113; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s5 5114; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5115; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5116; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 5117; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 5118; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5119; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5120; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 5121; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s7 5122; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5123; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5124; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 5125; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 5126; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5127; GCN-NOHSA-SI-NEXT: s_endpgm 5128; 5129; GCN-HSA-LABEL: constant_zextload_v8i16_to_v8i64: 5130; GCN-HSA: ; %bb.0: 5131; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5132; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5133; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 5134; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5135; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 5136; GCN-HSA-NEXT: s_mov_b32 s2, 0xffff 5137; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5138; GCN-HSA-NEXT: s_lshr_b32 s8, s5, 16 5139; GCN-HSA-NEXT: s_lshr_b32 s3, s7, 16 5140; GCN-HSA-NEXT: s_lshr_b32 s9, s6, 16 5141; GCN-HSA-NEXT: s_lshr_b32 s10, s4, 16 5142; GCN-HSA-NEXT: s_and_b32 s4, s4, s2 5143; GCN-HSA-NEXT: s_and_b32 s6, s6, s2 5144; GCN-HSA-NEXT: s_and_b32 s5, s5, s2 5145; GCN-HSA-NEXT: s_and_b32 s2, s7, s2 5146; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5147; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 5148; GCN-HSA-NEXT: v_mov_b32_e32 v2, s3 5149; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5150; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5151; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5152; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5153; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5154; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5155; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5156; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5157; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 5158; GCN-HSA-NEXT: v_mov_b32_e32 v0, s5 5159; GCN-HSA-NEXT: v_mov_b32_e32 v2, s8 5160; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5161; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5162; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5163; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 5164; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 5165; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5166; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5167; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5168; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 5169; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 5170; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5171; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5172; GCN-HSA-NEXT: s_endpgm 5173; 5174; GCN-NOHSA-VI-LABEL: constant_zextload_v8i16_to_v8i64: 5175; GCN-NOHSA-VI: ; %bb.0: 5176; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 5177; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5178; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5179; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5180; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 5181; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5182; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[8:11], s[6:7], 0x0 5183; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, 0xffff 5184; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 5185; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 5186; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5187; GCN-NOHSA-VI-NEXT: s_and_b32 s4, s8, s6 5188; GCN-NOHSA-VI-NEXT: s_lshr_b32 s5, s8, 16 5189; GCN-NOHSA-VI-NEXT: s_and_b32 s7, s9, s6 5190; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s9, 16 5191; GCN-NOHSA-VI-NEXT: s_and_b32 s9, s10, s6 5192; GCN-NOHSA-VI-NEXT: s_and_b32 s6, s11, s6 5193; GCN-NOHSA-VI-NEXT: s_lshr_b32 s11, s11, 16 5194; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s10, 16 5195; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s6 5196; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 5197; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5198; GCN-NOHSA-VI-NEXT: s_nop 0 5199; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s9 5200; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 5201; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5202; GCN-NOHSA-VI-NEXT: s_nop 0 5203; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s7 5204; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s8 5205; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5206; GCN-NOHSA-VI-NEXT: s_nop 0 5207; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s4 5208; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 5209; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5210; GCN-NOHSA-VI-NEXT: s_endpgm 5211; 5212; EG-LABEL: constant_zextload_v8i16_to_v8i64: 5213; EG: ; %bb.0: 5214; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 5215; EG-NEXT: TEX 0 @8 5216; EG-NEXT: ALU 30, @11, KC0[CB0:0-32], KC1[] 5217; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T14.X, 0 5218; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T13.X, 0 5219; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T12.X, 0 5220; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 1 5221; EG-NEXT: CF_END 5222; EG-NEXT: Fetch clause starting at 8: 5223; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 5224; EG-NEXT: ALU clause starting at 10: 5225; EG-NEXT: MOV * T7.X, KC0[2].Z, 5226; EG-NEXT: ALU clause starting at 11: 5227; EG-NEXT: LSHR * T8.Z, T7.W, literal.x, 5228; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5229; EG-NEXT: AND_INT T8.X, T7.W, literal.x, 5230; EG-NEXT: MOV T8.Y, 0.0, 5231; EG-NEXT: LSHR T9.Z, T7.Z, literal.y, 5232; EG-NEXT: AND_INT * T9.X, T7.Z, literal.x, 5233; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5234; EG-NEXT: MOV T9.Y, 0.0, 5235; EG-NEXT: LSHR * T10.Z, T7.Y, literal.x, 5236; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5237; EG-NEXT: AND_INT T10.X, T7.Y, literal.x, 5238; EG-NEXT: MOV T10.Y, 0.0, 5239; EG-NEXT: LSHR T7.Z, T7.X, literal.y, 5240; EG-NEXT: AND_INT * T7.X, T7.X, literal.x, 5241; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5242; EG-NEXT: MOV T7.Y, 0.0, 5243; EG-NEXT: MOV T8.W, 0.0, 5244; EG-NEXT: MOV * T9.W, 0.0, 5245; EG-NEXT: MOV T10.W, 0.0, 5246; EG-NEXT: MOV * T7.W, 0.0, 5247; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x, 5248; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5249; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5250; EG-NEXT: LSHR T12.X, PV.W, literal.x, 5251; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5252; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 5253; EG-NEXT: LSHR T13.X, PV.W, literal.x, 5254; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5255; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 5256; EG-NEXT: LSHR * T14.X, PV.W, literal.x, 5257; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5258 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in 5259 %ext = zext <8 x i16> %load to <8 x i64> 5260 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 5261 ret void 5262} 5263 5264define amdgpu_kernel void @constant_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(4)* %in) #0 { 5265; GCN-NOHSA-SI-LABEL: constant_sextload_v8i16_to_v8i64: 5266; GCN-NOHSA-SI: ; %bb.0: 5267; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5268; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5269; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 5270; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 5271; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 5272; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5273; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s7 5274; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s5 5275; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s6, 16 5276; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s4, 16 5277; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[4:5], 0x100000 5278; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[6:7], 0x100000 5279; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 5280; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 5281; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 5282; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 5283; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 5284; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 5285; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 5286; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s9 5287; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 5288; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 5289; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5290; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5291; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 5292; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s11 5293; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 5294; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 5295; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5296; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5297; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 5298; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 5299; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s16 5300; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s17 5301; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 5302; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13 5303; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5304; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s14 5305; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s15 5306; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 5307; GCN-NOHSA-SI-NEXT: s_endpgm 5308; 5309; GCN-HSA-LABEL: constant_sextload_v8i16_to_v8i64: 5310; GCN-HSA: ; %bb.0: 5311; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5312; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5313; GCN-HSA-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 5314; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5315; GCN-HSA-NEXT: s_mov_b32 s2, s7 5316; GCN-HSA-NEXT: s_mov_b32 s8, s5 5317; GCN-HSA-NEXT: s_lshr_b32 s10, s6, 16 5318; GCN-HSA-NEXT: s_lshr_b32 s12, s4, 16 5319; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[4:5], 0x100000 5320; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x100000 5321; GCN-HSA-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 5322; GCN-HSA-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 5323; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 5324; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 5325; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[8:9], 0x100000 5326; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 5327; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5328; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 5329; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5330; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5331; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5332; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5333; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5334; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 5335; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 5336; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5337; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5338; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5339; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5340; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 5341; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 5342; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 5343; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 5344; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5 5345; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5346; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5347; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5348; GCN-HSA-NEXT: v_mov_b32_e32 v0, s16 5349; GCN-HSA-NEXT: v_mov_b32_e32 v1, s17 5350; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 5351; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11 5352; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5353; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5354; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5355; GCN-HSA-NEXT: v_mov_b32_e32 v0, s14 5356; GCN-HSA-NEXT: v_mov_b32_e32 v1, s15 5357; GCN-HSA-NEXT: v_mov_b32_e32 v2, s12 5358; GCN-HSA-NEXT: v_mov_b32_e32 v3, s13 5359; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5360; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5361; GCN-HSA-NEXT: s_endpgm 5362; 5363; GCN-NOHSA-VI-LABEL: constant_sextload_v8i16_to_v8i64: 5364; GCN-NOHSA-VI: ; %bb.0: 5365; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 5366; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5367; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 5368; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5369; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5370; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5371; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[6:7], 0x100000 5372; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16 5373; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[6:7], 0x100000 5374; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s7 5375; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[6:7], 0x100000 5376; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s7, 16 5377; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 5378; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s5 5379; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s5, 16 5380; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 5381; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19 5382; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 5383; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 5384; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[4:5], 0x100000 5385; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 5386; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[10:11], 0x100000 5387; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 5388; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5389; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 5390; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 5391; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15 5392; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16 5393; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17 5394; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5395; GCN-NOHSA-VI-NEXT: s_nop 0 5396; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s10 5397; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s11 5398; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s12 5399; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s13 5400; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5401; GCN-NOHSA-VI-NEXT: s_nop 0 5402; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 5403; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 5404; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 5405; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 5406; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5407; GCN-NOHSA-VI-NEXT: s_endpgm 5408; 5409; EG-LABEL: constant_sextload_v8i16_to_v8i64: 5410; EG: ; %bb.0: 5411; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 5412; EG-NEXT: TEX 0 @8 5413; EG-NEXT: ALU 33, @11, KC0[CB0:0-32], KC1[] 5414; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0 5415; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 0 5416; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T9.X, 0 5417; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 1 5418; EG-NEXT: CF_END 5419; EG-NEXT: Fetch clause starting at 8: 5420; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 5421; EG-NEXT: ALU clause starting at 10: 5422; EG-NEXT: MOV * T7.X, KC0[2].Z, 5423; EG-NEXT: ALU clause starting at 11: 5424; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x, 5425; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5426; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5427; EG-NEXT: LSHR T9.X, PV.W, literal.x, 5428; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 5429; EG-NEXT: ASHR * T10.W, T7.X, literal.z, 5430; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 5431; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5432; EG-NEXT: LSHR T11.X, PV.W, literal.x, 5433; EG-NEXT: ASHR T10.Z, T7.X, literal.y, 5434; EG-NEXT: ASHR * T12.W, T7.Y, literal.z, 5435; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5436; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5437; EG-NEXT: BFE_INT T10.X, T7.X, 0.0, literal.x, 5438; EG-NEXT: ASHR T12.Z, T7.Y, literal.x, 5439; EG-NEXT: ASHR * T13.W, T7.Z, literal.y, 5440; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5441; EG-NEXT: BFE_INT T12.X, T7.Y, 0.0, literal.x, 5442; EG-NEXT: ASHR T10.Y, PV.X, literal.y, 5443; EG-NEXT: ASHR T13.Z, T7.Z, literal.x, 5444; EG-NEXT: ASHR * T14.W, T7.W, literal.y, 5445; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5446; EG-NEXT: BFE_INT T13.X, T7.Z, 0.0, literal.x, 5447; EG-NEXT: ASHR T12.Y, PV.X, literal.y, 5448; EG-NEXT: ASHR * T14.Z, T7.W, literal.x, 5449; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5450; EG-NEXT: BFE_INT T14.X, T7.W, 0.0, literal.x, 5451; EG-NEXT: ASHR T13.Y, PV.X, literal.y, 5452; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 5453; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 5454; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 5455; EG-NEXT: LSHR T7.X, PV.W, literal.x, 5456; EG-NEXT: ASHR * T14.Y, PV.X, literal.y, 5457; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5458 %load = load <8 x i16>, <8 x i16> addrspace(4)* %in 5459 %ext = sext <8 x i16> %load to <8 x i64> 5460 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 5461 ret void 5462} 5463 5464define amdgpu_kernel void @constant_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 { 5465; GCN-NOHSA-SI-LABEL: constant_zextload_v16i16_to_v16i64: 5466; GCN-NOHSA-SI: ; %bb.0: 5467; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x9 5468; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 5469; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5470; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[4:11], s[14:15], 0x0 5471; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 5472; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5473; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, 0xffff 5474; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 5475; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s12 5476; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s13 5477; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5478; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s5, 16 5479; GCN-NOHSA-SI-NEXT: s_lshr_b32 s13, s7, 16 5480; GCN-NOHSA-SI-NEXT: s_lshr_b32 s15, s11, 16 5481; GCN-NOHSA-SI-NEXT: s_lshr_b32 s16, s9, 16 5482; GCN-NOHSA-SI-NEXT: s_lshr_b32 s17, s8, 16 5483; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s10, 16 5484; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s6, 16 5485; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s4, 16 5486; GCN-NOHSA-SI-NEXT: s_and_b32 s4, s4, s14 5487; GCN-NOHSA-SI-NEXT: s_and_b32 s6, s6, s14 5488; GCN-NOHSA-SI-NEXT: s_and_b32 s10, s10, s14 5489; GCN-NOHSA-SI-NEXT: s_and_b32 s8, s8, s14 5490; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s14 5491; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s14 5492; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, s14 5493; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, s14 5494; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s9 5495; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s16 5496; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 5497; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5498; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s11 5499; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s15 5500; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 5501; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5502; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s7 5503; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s13 5504; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5505; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5506; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 5507; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 5508; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5509; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5510; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s8 5511; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s17 5512; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 5513; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5514; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s10 5515; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s18 5516; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 5517; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5518; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s6 5519; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s19 5520; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5521; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5522; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s4 5523; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20 5524; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5525; GCN-NOHSA-SI-NEXT: s_endpgm 5526; 5527; GCN-HSA-LABEL: constant_zextload_v16i16_to_v16i64: 5528; GCN-HSA: ; %bb.0: 5529; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5530; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5531; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 5532; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5533; GCN-HSA-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 5534; GCN-HSA-NEXT: s_mov_b32 s2, 0xffff 5535; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5536; GCN-HSA-NEXT: s_lshr_b32 s12, s5, 16 5537; GCN-HSA-NEXT: s_lshr_b32 s13, s7, 16 5538; GCN-HSA-NEXT: s_lshr_b32 s14, s11, 16 5539; GCN-HSA-NEXT: s_lshr_b32 s3, s9, 16 5540; GCN-HSA-NEXT: s_lshr_b32 s15, s8, 16 5541; GCN-HSA-NEXT: s_lshr_b32 s16, s10, 16 5542; GCN-HSA-NEXT: s_lshr_b32 s17, s6, 16 5543; GCN-HSA-NEXT: s_lshr_b32 s18, s4, 16 5544; GCN-HSA-NEXT: s_and_b32 s4, s4, s2 5545; GCN-HSA-NEXT: s_and_b32 s6, s6, s2 5546; GCN-HSA-NEXT: s_and_b32 s10, s10, s2 5547; GCN-HSA-NEXT: s_and_b32 s8, s8, s2 5548; GCN-HSA-NEXT: s_and_b32 s5, s5, s2 5549; GCN-HSA-NEXT: s_and_b32 s7, s7, s2 5550; GCN-HSA-NEXT: s_and_b32 s11, s11, s2 5551; GCN-HSA-NEXT: s_and_b32 s2, s9, s2 5552; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5553; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 5554; GCN-HSA-NEXT: v_mov_b32_e32 v2, s3 5555; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5556; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5557; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5558; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 5559; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5560; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5561; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5562; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5563; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 5564; GCN-HSA-NEXT: v_mov_b32_e32 v0, s11 5565; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14 5566; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5567; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5568; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5569; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5570; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5571; GCN-HSA-NEXT: v_mov_b32_e32 v0, s7 5572; GCN-HSA-NEXT: v_mov_b32_e32 v2, s13 5573; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5574; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5575; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5576; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5577; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 5578; GCN-HSA-NEXT: v_mov_b32_e32 v0, s5 5579; GCN-HSA-NEXT: v_mov_b32_e32 v2, s12 5580; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5581; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5582; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5583; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5584; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 5585; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 5586; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15 5587; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5588; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5589; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5590; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5591; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 5592; GCN-HSA-NEXT: v_mov_b32_e32 v0, s10 5593; GCN-HSA-NEXT: v_mov_b32_e32 v2, s16 5594; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5595; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5596; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 5597; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 5598; GCN-HSA-NEXT: v_mov_b32_e32 v2, s17 5599; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 5600; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5601; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5602; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 5603; GCN-HSA-NEXT: v_mov_b32_e32 v2, s18 5604; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5605; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5606; GCN-HSA-NEXT: s_endpgm 5607; 5608; GCN-NOHSA-VI-LABEL: constant_zextload_v16i16_to_v16i64: 5609; GCN-NOHSA-VI: ; %bb.0: 5610; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x24 5611; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5612; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 5613; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 5614; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 5615; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5616; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[4:11], s[14:15], 0x0 5617; GCN-NOHSA-VI-NEXT: s_mov_b32 s14, 0xffff 5618; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s12 5619; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s13 5620; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5621; GCN-NOHSA-VI-NEXT: s_and_b32 s12, s4, s14 5622; GCN-NOHSA-VI-NEXT: s_and_b32 s13, s5, s14 5623; GCN-NOHSA-VI-NEXT: s_and_b32 s15, s6, s14 5624; GCN-NOHSA-VI-NEXT: s_and_b32 s16, s7, s14 5625; GCN-NOHSA-VI-NEXT: s_and_b32 s17, s8, s14 5626; GCN-NOHSA-VI-NEXT: s_and_b32 s18, s9, s14 5627; GCN-NOHSA-VI-NEXT: s_and_b32 s19, s10, s14 5628; GCN-NOHSA-VI-NEXT: s_and_b32 s14, s11, s14 5629; GCN-NOHSA-VI-NEXT: s_lshr_b32 s11, s11, 16 5630; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s10, 16 5631; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 5632; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 5633; GCN-NOHSA-VI-NEXT: s_lshr_b32 s9, s9, 16 5634; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 5635; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s8, 16 5636; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s19 5637; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 5638; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 5639; GCN-NOHSA-VI-NEXT: s_lshr_b32 s7, s7, 16 5640; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 5641; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 5642; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 5643; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16 5644; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s17 5645; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s8 5646; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 5647; GCN-NOHSA-VI-NEXT: s_lshr_b32 s5, s5, 16 5648; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s16 5649; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 5650; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5651; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 5652; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s15 5653; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 5654; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 5655; GCN-NOHSA-VI-NEXT: s_nop 0 5656; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s13 5657; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 5658; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5659; GCN-NOHSA-VI-NEXT: s_nop 0 5660; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 5661; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 5662; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 5663; GCN-NOHSA-VI-NEXT: s_endpgm 5664; 5665; EG-LABEL: constant_zextload_v16i16_to_v16i64: 5666; EG: ; %bb.0: 5667; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 5668; EG-NEXT: TEX 1 @12 5669; EG-NEXT: ALU 62, @17, KC0[CB0:0-32], KC1[] 5670; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T26.X, 0 5671; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0 5672; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0 5673; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T23.X, 0 5674; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T22.X, 0 5675; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T21.X, 0 5676; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T20.X, 0 5677; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 1 5678; EG-NEXT: CF_END 5679; EG-NEXT: Fetch clause starting at 12: 5680; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 5681; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 5682; EG-NEXT: ALU clause starting at 16: 5683; EG-NEXT: MOV * T11.X, KC0[2].Z, 5684; EG-NEXT: ALU clause starting at 17: 5685; EG-NEXT: LSHR * T13.Z, T12.W, literal.x, 5686; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5687; EG-NEXT: AND_INT T13.X, T12.W, literal.x, 5688; EG-NEXT: MOV T13.Y, 0.0, 5689; EG-NEXT: LSHR T14.Z, T12.Z, literal.y, 5690; EG-NEXT: AND_INT * T14.X, T12.Z, literal.x, 5691; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5692; EG-NEXT: MOV T14.Y, 0.0, 5693; EG-NEXT: LSHR * T15.Z, T12.Y, literal.x, 5694; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5695; EG-NEXT: AND_INT T15.X, T12.Y, literal.x, 5696; EG-NEXT: MOV T15.Y, 0.0, 5697; EG-NEXT: LSHR T12.Z, T12.X, literal.y, 5698; EG-NEXT: AND_INT * T12.X, T12.X, literal.x, 5699; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5700; EG-NEXT: MOV T12.Y, 0.0, 5701; EG-NEXT: LSHR * T16.Z, T11.W, literal.x, 5702; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5703; EG-NEXT: AND_INT T16.X, T11.W, literal.x, 5704; EG-NEXT: MOV T16.Y, 0.0, 5705; EG-NEXT: LSHR T17.Z, T11.Z, literal.y, 5706; EG-NEXT: AND_INT * T17.X, T11.Z, literal.x, 5707; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5708; EG-NEXT: MOV T17.Y, 0.0, 5709; EG-NEXT: LSHR * T18.Z, T11.Y, literal.x, 5710; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5711; EG-NEXT: AND_INT T18.X, T11.Y, literal.x, 5712; EG-NEXT: MOV T18.Y, 0.0, 5713; EG-NEXT: LSHR T11.Z, T11.X, literal.y, 5714; EG-NEXT: AND_INT * T11.X, T11.X, literal.x, 5715; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5716; EG-NEXT: MOV T11.Y, 0.0, 5717; EG-NEXT: MOV T13.W, 0.0, 5718; EG-NEXT: MOV * T14.W, 0.0, 5719; EG-NEXT: MOV T15.W, 0.0, 5720; EG-NEXT: MOV * T12.W, 0.0, 5721; EG-NEXT: MOV T16.W, 0.0, 5722; EG-NEXT: MOV * T17.W, 0.0, 5723; EG-NEXT: MOV T18.W, 0.0, 5724; EG-NEXT: MOV * T11.W, 0.0, 5725; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x, 5726; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5727; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5728; EG-NEXT: LSHR T20.X, PV.W, literal.x, 5729; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5730; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 5731; EG-NEXT: LSHR T21.X, PV.W, literal.x, 5732; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5733; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 5734; EG-NEXT: LSHR T22.X, PV.W, literal.x, 5735; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5736; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 5737; EG-NEXT: LSHR T23.X, PV.W, literal.x, 5738; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5739; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 5740; EG-NEXT: LSHR T24.X, PV.W, literal.x, 5741; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5742; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 5743; EG-NEXT: LSHR T25.X, PV.W, literal.x, 5744; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5745; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 5746; EG-NEXT: LSHR * T26.X, PV.W, literal.x, 5747; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5748 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in 5749 %ext = zext <16 x i16> %load to <16 x i64> 5750 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 5751 ret void 5752} 5753 5754define amdgpu_kernel void @constant_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(4)* %in) #0 { 5755; GCN-NOHSA-SI-LABEL: constant_sextload_v16i16_to_v16i64: 5756; GCN-NOHSA-SI: ; %bb.0: 5757; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5758; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5759; GCN-NOHSA-SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0 5760; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 5761; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 5762; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5763; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, s11 5764; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, s9 5765; GCN-NOHSA-SI-NEXT: s_mov_b32 s16, s7 5766; GCN-NOHSA-SI-NEXT: s_mov_b32 s18, s5 5767; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s10, 16 5768; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s8, 16 5769; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s6, 16 5770; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s4, 16 5771; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[28:29], s[4:5], 0x100000 5772; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[30:31], s[6:7], 0x100000 5773; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[34:35], s[8:9], 0x100000 5774; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[36:37], s[10:11], 0x100000 5775; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 5776; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 5777; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[8:9], s[8:9], 48 5778; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[10:11], s[10:11], 48 5779; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000 5780; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000 5781; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 5782; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[12:13], 0x100000 5783; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[26:27], s[26:27], 0x100000 5784; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000 5785; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x100000 5786; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000 5787; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s12 5788; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s13 5789; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s10 5790; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s11 5791; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 5792; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5793; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s14 5794; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s15 5795; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8 5796; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s9 5797; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 5798; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5799; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s16 5800; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s17 5801; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 5802; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s7 5803; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 5804; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5805; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 5806; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 5807; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 5808; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 5809; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 5810; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 5811; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36 5812; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s37 5813; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s34 5814; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s35 5815; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s30 5816; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s31 5817; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s28 5818; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s29 5819; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20 5820; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s21 5821; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 5822; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s22 5823; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s23 5824; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64 5825; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s24 5826; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s25 5827; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32 5828; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s26 5829; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s27 5830; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 5831; GCN-NOHSA-SI-NEXT: s_endpgm 5832; 5833; GCN-HSA-LABEL: constant_sextload_v16i16_to_v16i64: 5834; GCN-HSA: ; %bb.0: 5835; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5836; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5837; GCN-HSA-NEXT: s_load_dwordx8 s[8:15], s[2:3], 0x0 5838; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5839; GCN-HSA-NEXT: s_mov_b32 s6, s15 5840; GCN-HSA-NEXT: s_mov_b32 s16, s13 5841; GCN-HSA-NEXT: s_mov_b32 s18, s11 5842; GCN-HSA-NEXT: s_mov_b32 s20, s9 5843; GCN-HSA-NEXT: s_lshr_b32 s22, s14, 16 5844; GCN-HSA-NEXT: s_lshr_b32 s24, s12, 16 5845; GCN-HSA-NEXT: s_lshr_b32 s26, s10, 16 5846; GCN-HSA-NEXT: s_lshr_b32 s28, s8, 16 5847; GCN-HSA-NEXT: s_bfe_i64 s[34:35], s[14:15], 0x100000 5848; GCN-HSA-NEXT: s_ashr_i64 s[14:15], s[14:15], 48 5849; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 5850; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[8:9], 0x100000 5851; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[10:11], 0x100000 5852; GCN-HSA-NEXT: s_bfe_i64 s[30:31], s[12:13], 0x100000 5853; GCN-HSA-NEXT: s_ashr_i64 s[8:9], s[8:9], 48 5854; GCN-HSA-NEXT: s_ashr_i64 s[10:11], s[10:11], 48 5855; GCN-HSA-NEXT: s_ashr_i64 s[12:13], s[12:13], 48 5856; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 5857; GCN-HSA-NEXT: v_mov_b32_e32 v1, s7 5858; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14 5859; GCN-HSA-NEXT: v_mov_b32_e32 v3, s15 5860; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[28:29], 0x100000 5861; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[26:27], 0x100000 5862; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000 5863; GCN-HSA-NEXT: s_bfe_i64 s[22:23], s[22:23], 0x100000 5864; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[20:21], 0x100000 5865; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[18:19], 0x100000 5866; GCN-HSA-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000 5867; GCN-HSA-NEXT: s_add_u32 s26, s0, 0x70 5868; GCN-HSA-NEXT: s_addc_u32 s27, s1, 0 5869; GCN-HSA-NEXT: v_mov_b32_e32 v8, s26 5870; GCN-HSA-NEXT: v_mov_b32_e32 v6, s12 5871; GCN-HSA-NEXT: s_add_u32 s12, s0, 0x50 5872; GCN-HSA-NEXT: v_mov_b32_e32 v9, s27 5873; GCN-HSA-NEXT: v_mov_b32_e32 v7, s13 5874; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0 5875; GCN-HSA-NEXT: v_mov_b32_e32 v10, s12 5876; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 5877; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17 5878; GCN-HSA-NEXT: v_mov_b32_e32 v11, s13 5879; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 5880; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 5881; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 5882; GCN-HSA-NEXT: s_add_u32 s10, s0, 48 5883; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11 5884; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 5885; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 5886; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18 5887; GCN-HSA-NEXT: v_mov_b32_e32 v1, s19 5888; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 5889; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5890; GCN-HSA-NEXT: s_nop 0 5891; GCN-HSA-NEXT: v_mov_b32_e32 v2, s8 5892; GCN-HSA-NEXT: s_add_u32 s8, s0, 16 5893; GCN-HSA-NEXT: v_mov_b32_e32 v3, s9 5894; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 5895; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8 5896; GCN-HSA-NEXT: v_mov_b32_e32 v0, s20 5897; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 5898; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9 5899; GCN-HSA-NEXT: s_add_u32 s8, s0, 0x60 5900; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5901; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 5902; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8 5903; GCN-HSA-NEXT: v_mov_b32_e32 v0, s34 5904; GCN-HSA-NEXT: v_mov_b32_e32 v1, s35 5905; GCN-HSA-NEXT: v_mov_b32_e32 v2, s22 5906; GCN-HSA-NEXT: v_mov_b32_e32 v3, s23 5907; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9 5908; GCN-HSA-NEXT: s_add_u32 s8, s0, 64 5909; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5910; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 5911; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8 5912; GCN-HSA-NEXT: v_mov_b32_e32 v0, s30 5913; GCN-HSA-NEXT: v_mov_b32_e32 v1, s31 5914; GCN-HSA-NEXT: v_mov_b32_e32 v2, s24 5915; GCN-HSA-NEXT: v_mov_b32_e32 v3, s25 5916; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9 5917; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5918; GCN-HSA-NEXT: s_nop 0 5919; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 5920; GCN-HSA-NEXT: s_add_u32 s4, s0, 32 5921; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 5922; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 5923; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 5924; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14 5925; GCN-HSA-NEXT: v_mov_b32_e32 v3, s15 5926; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 5927; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5928; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5929; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5930; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5931; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 5932; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 5933; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5934; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5935; GCN-HSA-NEXT: s_endpgm 5936; 5937; GCN-NOHSA-VI-LABEL: constant_sextload_v16i16_to_v16i64: 5938; GCN-NOHSA-VI: ; %bb.0: 5939; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x24 5940; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5941; GCN-NOHSA-VI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 5942; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, 0xf000 5943; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, -1 5944; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5945; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[6:7], 0x100000 5946; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16 5947; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[6:7], 0x100000 5948; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s7 5949; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[4:5], 0x100000 5950; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 5951; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[36:37], s[6:7], 0x100000 5952; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s7, 16 5953; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[4:5], 0x100000 5954; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s5 5955; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[6:7], 0x100000 5956; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[2:3], 0x100000 5957; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s2, 16 5958; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[28:29], s[4:5], 0x100000 5959; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s5, 16 5960; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36 5961; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s37 5962; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 5963; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 5964; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[2:3], 0x100000 5965; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, s3 5966; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x100000 5967; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:112 5968; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[2:3], 0x100000 5969; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30 5970; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s31 5971; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s34 5972; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 5973; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s3, 16 5974; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:96 5975; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x100000 5976; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 5977; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29 5978; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 5979; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 5980; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:80 5981; GCN-NOHSA-VI-NEXT: s_mov_b32 s14, s1 5982; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 5983; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25 5984; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s26 5985; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s27 5986; GCN-NOHSA-VI-NEXT: s_lshr_b32 s16, s1, 16 5987; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:64 5988; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[0:1], 0x100000 5989; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22 5990; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23 5991; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2 5992; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s3 5993; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s0, 16 5994; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[14:15], 0x100000 5995; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[16:17], 0x100000 5996; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:48 5997; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x100000 5998; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 5999; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19 6000; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s20 6001; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s21 6002; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:32 6003; GCN-NOHSA-VI-NEXT: s_nop 0 6004; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 6005; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15 6006; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16 6007; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17 6008; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 offset:16 6009; GCN-NOHSA-VI-NEXT: s_nop 0 6010; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s12 6011; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s13 6012; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s0 6013; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s1 6014; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 6015; GCN-NOHSA-VI-NEXT: s_endpgm 6016; 6017; EG-LABEL: constant_sextload_v16i16_to_v16i64: 6018; EG: ; %bb.0: 6019; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 6020; EG-NEXT: TEX 1 @12 6021; EG-NEXT: ALU 65, @17, KC0[CB0:0-32], KC1[] 6022; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T12.X, 0 6023; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T20.X, 0 6024; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T18.X, 0 6025; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T17.X, 0 6026; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T16.X, 0 6027; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T15.X, 0 6028; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T14.X, 0 6029; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T13.X, 1 6030; EG-NEXT: CF_END 6031; EG-NEXT: Fetch clause starting at 12: 6032; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 6033; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 6034; EG-NEXT: ALU clause starting at 16: 6035; EG-NEXT: MOV * T11.X, KC0[2].Z, 6036; EG-NEXT: ALU clause starting at 17: 6037; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 6038; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6039; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6040; EG-NEXT: LSHR T14.X, PV.W, literal.x, 6041; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6042; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6043; EG-NEXT: LSHR T15.X, PV.W, literal.x, 6044; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6045; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6046; EG-NEXT: LSHR T16.X, PV.W, literal.x, 6047; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6048; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 6049; EG-NEXT: LSHR T17.X, PV.W, literal.x, 6050; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6051; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 6052; EG-NEXT: LSHR T18.X, PV.W, literal.x, 6053; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 6054; EG-NEXT: ASHR * T19.W, T11.X, literal.z, 6055; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 6056; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6057; EG-NEXT: LSHR T20.X, PV.W, literal.x, 6058; EG-NEXT: ASHR T19.Z, T11.X, literal.y, 6059; EG-NEXT: ASHR * T21.W, T11.Y, literal.z, 6060; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6061; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6062; EG-NEXT: BFE_INT T19.X, T11.X, 0.0, literal.x, 6063; EG-NEXT: ASHR T21.Z, T11.Y, literal.x, 6064; EG-NEXT: ASHR * T22.W, T11.Z, literal.y, 6065; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6066; EG-NEXT: BFE_INT T21.X, T11.Y, 0.0, literal.x, 6067; EG-NEXT: ASHR T19.Y, PV.X, literal.y, 6068; EG-NEXT: ASHR T22.Z, T11.Z, literal.x, 6069; EG-NEXT: ASHR * T23.W, T11.W, literal.y, 6070; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6071; EG-NEXT: BFE_INT T22.X, T11.Z, 0.0, literal.x, 6072; EG-NEXT: ASHR T21.Y, PV.X, literal.y, 6073; EG-NEXT: ASHR T23.Z, T11.W, literal.x, 6074; EG-NEXT: ASHR * T24.W, T12.X, literal.y, 6075; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6076; EG-NEXT: BFE_INT T23.X, T11.W, 0.0, literal.x, 6077; EG-NEXT: ASHR T22.Y, PV.X, literal.y, 6078; EG-NEXT: ASHR T24.Z, T12.X, literal.x, 6079; EG-NEXT: ASHR * T11.W, T12.Y, literal.y, 6080; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6081; EG-NEXT: BFE_INT T24.X, T12.X, 0.0, literal.x, 6082; EG-NEXT: ASHR T23.Y, PV.X, literal.y, 6083; EG-NEXT: ASHR T11.Z, T12.Y, literal.x, 6084; EG-NEXT: ASHR * T25.W, T12.Z, literal.y, 6085; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6086; EG-NEXT: BFE_INT T11.X, T12.Y, 0.0, literal.x, 6087; EG-NEXT: ASHR T24.Y, PV.X, literal.y, 6088; EG-NEXT: ASHR T25.Z, T12.Z, literal.x, 6089; EG-NEXT: ASHR * T26.W, T12.W, literal.y, 6090; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6091; EG-NEXT: BFE_INT T25.X, T12.Z, 0.0, literal.x, 6092; EG-NEXT: ASHR T11.Y, PV.X, literal.y, 6093; EG-NEXT: ASHR * T26.Z, T12.W, literal.x, 6094; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6095; EG-NEXT: BFE_INT T26.X, T12.W, 0.0, literal.x, 6096; EG-NEXT: ASHR T25.Y, PV.X, literal.y, 6097; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 6098; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6099; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 6100; EG-NEXT: LSHR T12.X, PV.W, literal.x, 6101; EG-NEXT: ASHR * T26.Y, PV.X, literal.y, 6102; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6103 %load = load <16 x i16>, <16 x i16> addrspace(4)* %in 6104 %ext = sext <16 x i16> %load to <16 x i64> 6105 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 6106 ret void 6107} 6108 6109define amdgpu_kernel void @constant_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 { 6110; GCN-NOHSA-SI-LABEL: constant_zextload_v32i16_to_v32i64: 6111; GCN-NOHSA-SI: ; %bb.0: 6112; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x9 6113; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6114; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 6115; GCN-NOHSA-SI-NEXT: s_mov_b32 s18, 0xffff 6116; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6117; GCN-NOHSA-SI-NEXT: s_lshr_b32 s19, s1, 16 6118; GCN-NOHSA-SI-NEXT: s_lshr_b32 s20, s3, 16 6119; GCN-NOHSA-SI-NEXT: s_lshr_b32 s21, s5, 16 6120; GCN-NOHSA-SI-NEXT: s_lshr_b32 s22, s7, 16 6121; GCN-NOHSA-SI-NEXT: s_lshr_b32 s23, s9, 16 6122; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s11, 16 6123; GCN-NOHSA-SI-NEXT: s_lshr_b32 s25, s13, 16 6124; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s15, 16 6125; GCN-NOHSA-SI-NEXT: s_and_b32 s27, s0, s18 6126; GCN-NOHSA-SI-NEXT: s_and_b32 s28, s2, s18 6127; GCN-NOHSA-SI-NEXT: s_and_b32 s29, s4, s18 6128; GCN-NOHSA-SI-NEXT: s_and_b32 s30, s6, s18 6129; GCN-NOHSA-SI-NEXT: s_and_b32 s31, s8, s18 6130; GCN-NOHSA-SI-NEXT: s_and_b32 s33, s10, s18 6131; GCN-NOHSA-SI-NEXT: s_and_b32 s34, s12, s18 6132; GCN-NOHSA-SI-NEXT: s_and_b32 s35, s14, s18 6133; GCN-NOHSA-SI-NEXT: s_and_b32 s36, s1, s18 6134; GCN-NOHSA-SI-NEXT: s_and_b32 s37, s3, s18 6135; GCN-NOHSA-SI-NEXT: s_and_b32 s5, s5, s18 6136; GCN-NOHSA-SI-NEXT: s_and_b32 s7, s7, s18 6137; GCN-NOHSA-SI-NEXT: s_and_b32 s9, s9, s18 6138; GCN-NOHSA-SI-NEXT: s_and_b32 s11, s11, s18 6139; GCN-NOHSA-SI-NEXT: s_and_b32 s13, s13, s18 6140; GCN-NOHSA-SI-NEXT: s_and_b32 s15, s15, s18 6141; GCN-NOHSA-SI-NEXT: s_lshr_b32 s14, s14, 16 6142; GCN-NOHSA-SI-NEXT: s_lshr_b32 s12, s12, 16 6143; GCN-NOHSA-SI-NEXT: s_lshr_b32 s10, s10, 16 6144; GCN-NOHSA-SI-NEXT: s_lshr_b32 s8, s8, 16 6145; GCN-NOHSA-SI-NEXT: s_lshr_b32 s6, s6, 16 6146; GCN-NOHSA-SI-NEXT: s_lshr_b32 s4, s4, 16 6147; GCN-NOHSA-SI-NEXT: s_lshr_b32 s18, s2, 16 6148; GCN-NOHSA-SI-NEXT: s_lshr_b32 s38, s0, 16 6149; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6150; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 6151; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6152; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 6153; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 6154; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 6155; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s15 6156; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s26 6157; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 6158; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6159; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s13 6160; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s25 6161; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 6162; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6163; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s11 6164; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24 6165; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 6166; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6167; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s9 6168; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s23 6169; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 6170; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6171; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s7 6172; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s22 6173; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 6174; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6175; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s5 6176; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s21 6177; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 6178; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6179; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s37 6180; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s20 6181; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6182; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6183; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s36 6184; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s19 6185; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6186; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6187; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s35 6188; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s14 6189; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 6190; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6191; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s34 6192; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 6193; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 6194; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6195; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s33 6196; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s10 6197; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 6198; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6199; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s31 6200; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s8 6201; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 6202; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6203; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s30 6204; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s6 6205; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 6206; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6207; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s29 6208; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 6209; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 6210; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6211; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s28 6212; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s18 6213; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6214; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6215; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s27 6216; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s38 6217; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 6218; GCN-NOHSA-SI-NEXT: s_endpgm 6219; 6220; GCN-HSA-LABEL: constant_zextload_v32i16_to_v32i64: 6221; GCN-HSA: ; %bb.0: 6222; GCN-HSA-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x0 6223; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 6224; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 6225; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6226; GCN-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 6227; GCN-HSA-NEXT: s_mov_b32 s18, 0xffff 6228; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6229; GCN-HSA-NEXT: s_and_b32 s19, s0, s18 6230; GCN-HSA-NEXT: s_and_b32 s20, s2, s18 6231; GCN-HSA-NEXT: s_and_b32 s21, s4, s18 6232; GCN-HSA-NEXT: s_and_b32 s22, s6, s18 6233; GCN-HSA-NEXT: s_and_b32 s23, s8, s18 6234; GCN-HSA-NEXT: s_and_b32 s24, s10, s18 6235; GCN-HSA-NEXT: s_and_b32 s25, s12, s18 6236; GCN-HSA-NEXT: s_and_b32 s26, s14, s18 6237; GCN-HSA-NEXT: s_and_b32 s27, s1, s18 6238; GCN-HSA-NEXT: s_and_b32 s28, s3, s18 6239; GCN-HSA-NEXT: s_and_b32 s29, s5, s18 6240; GCN-HSA-NEXT: s_and_b32 s30, s7, s18 6241; GCN-HSA-NEXT: s_and_b32 s31, s9, s18 6242; GCN-HSA-NEXT: s_and_b32 s33, s11, s18 6243; GCN-HSA-NEXT: s_and_b32 s34, s13, s18 6244; GCN-HSA-NEXT: s_and_b32 s18, s15, s18 6245; GCN-HSA-NEXT: s_lshr_b32 s35, s1, 16 6246; GCN-HSA-NEXT: s_lshr_b32 s3, s3, 16 6247; GCN-HSA-NEXT: s_lshr_b32 s5, s5, 16 6248; GCN-HSA-NEXT: s_lshr_b32 s7, s7, 16 6249; GCN-HSA-NEXT: s_lshr_b32 s9, s9, 16 6250; GCN-HSA-NEXT: s_lshr_b32 s11, s11, 16 6251; GCN-HSA-NEXT: s_lshr_b32 s13, s13, 16 6252; GCN-HSA-NEXT: s_lshr_b32 s15, s15, 16 6253; GCN-HSA-NEXT: s_lshr_b32 s14, s14, 16 6254; GCN-HSA-NEXT: s_lshr_b32 s12, s12, 16 6255; GCN-HSA-NEXT: s_lshr_b32 s10, s10, 16 6256; GCN-HSA-NEXT: s_lshr_b32 s8, s8, 16 6257; GCN-HSA-NEXT: s_lshr_b32 s6, s6, 16 6258; GCN-HSA-NEXT: s_lshr_b32 s4, s4, 16 6259; GCN-HSA-NEXT: s_lshr_b32 s2, s2, 16 6260; GCN-HSA-NEXT: s_lshr_b32 s36, s0, 16 6261; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xf0 6262; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 6263; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6264; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6265; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xd0 6266; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 6267; GCN-HSA-NEXT: v_mov_b32_e32 v7, s1 6268; GCN-HSA-NEXT: v_mov_b32_e32 v6, s0 6269; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xb0 6270; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 6271; GCN-HSA-NEXT: v_mov_b32_e32 v9, s1 6272; GCN-HSA-NEXT: v_mov_b32_e32 v8, s0 6273; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x90 6274; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 6275; GCN-HSA-NEXT: v_mov_b32_e32 v11, s1 6276; GCN-HSA-NEXT: v_mov_b32_e32 v10, s0 6277; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18 6278; GCN-HSA-NEXT: v_mov_b32_e32 v2, s15 6279; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x70 6280; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6281; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 6282; GCN-HSA-NEXT: v_mov_b32_e32 v0, s34 6283; GCN-HSA-NEXT: v_mov_b32_e32 v2, s13 6284; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 6285; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6286; GCN-HSA-NEXT: v_mov_b32_e32 v0, s33 6287; GCN-HSA-NEXT: v_mov_b32_e32 v2, s11 6288; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 6289; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6290; GCN-HSA-NEXT: v_mov_b32_e32 v0, s31 6291; GCN-HSA-NEXT: v_mov_b32_e32 v2, s9 6292; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x50 6293; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3] 6294; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 6295; GCN-HSA-NEXT: v_mov_b32_e32 v0, s30 6296; GCN-HSA-NEXT: v_mov_b32_e32 v2, s7 6297; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6298; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6299; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6300; GCN-HSA-NEXT: s_add_u32 s0, s16, 48 6301; GCN-HSA-NEXT: v_mov_b32_e32 v0, s29 6302; GCN-HSA-NEXT: v_mov_b32_e32 v2, s5 6303; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 6304; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6305; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6306; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6307; GCN-HSA-NEXT: s_add_u32 s0, s16, 16 6308; GCN-HSA-NEXT: v_mov_b32_e32 v0, s28 6309; GCN-HSA-NEXT: v_mov_b32_e32 v2, s3 6310; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 6311; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6312; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6313; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6314; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xe0 6315; GCN-HSA-NEXT: v_mov_b32_e32 v0, s27 6316; GCN-HSA-NEXT: v_mov_b32_e32 v2, s35 6317; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 6318; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6319; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6320; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6321; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xc0 6322; GCN-HSA-NEXT: v_mov_b32_e32 v0, s26 6323; GCN-HSA-NEXT: v_mov_b32_e32 v2, s14 6324; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 6325; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6326; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6327; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6328; GCN-HSA-NEXT: s_add_u32 s0, s16, 0xa0 6329; GCN-HSA-NEXT: v_mov_b32_e32 v0, s25 6330; GCN-HSA-NEXT: v_mov_b32_e32 v2, s12 6331; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 6332; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6333; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6334; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6335; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x80 6336; GCN-HSA-NEXT: v_mov_b32_e32 v0, s24 6337; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 6338; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 6339; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6340; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6341; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6342; GCN-HSA-NEXT: s_add_u32 s0, s16, 0x60 6343; GCN-HSA-NEXT: v_mov_b32_e32 v0, s23 6344; GCN-HSA-NEXT: v_mov_b32_e32 v2, s8 6345; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 6346; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6347; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6348; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6349; GCN-HSA-NEXT: s_add_u32 s0, s16, 64 6350; GCN-HSA-NEXT: v_mov_b32_e32 v0, s22 6351; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 6352; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 6353; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6354; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6355; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6356; GCN-HSA-NEXT: s_add_u32 s0, s16, 32 6357; GCN-HSA-NEXT: v_mov_b32_e32 v0, s21 6358; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 6359; GCN-HSA-NEXT: s_addc_u32 s1, s17, 0 6360; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6361; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6362; GCN-HSA-NEXT: v_mov_b32_e32 v0, s20 6363; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 6364; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6365; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6366; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 6367; GCN-HSA-NEXT: v_mov_b32_e32 v0, s19 6368; GCN-HSA-NEXT: v_mov_b32_e32 v2, s36 6369; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17 6370; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6371; GCN-HSA-NEXT: s_endpgm 6372; 6373; GCN-NOHSA-VI-LABEL: constant_zextload_v32i16_to_v32i64: 6374; GCN-NOHSA-VI: ; %bb.0: 6375; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[20:23], s[0:1], 0x24 6376; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 6377; GCN-NOHSA-VI-NEXT: s_mov_b32 s19, 0xf000 6378; GCN-NOHSA-VI-NEXT: s_mov_b32 s18, -1 6379; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 6380; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6381; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[22:23], 0x0 6382; GCN-NOHSA-VI-NEXT: s_mov_b32 s22, 0xffff 6383; GCN-NOHSA-VI-NEXT: s_mov_b32 s16, s20 6384; GCN-NOHSA-VI-NEXT: s_mov_b32 s17, s21 6385; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6386; GCN-NOHSA-VI-NEXT: s_and_b32 s20, s0, s22 6387; GCN-NOHSA-VI-NEXT: s_and_b32 s21, s1, s22 6388; GCN-NOHSA-VI-NEXT: s_and_b32 s23, s2, s22 6389; GCN-NOHSA-VI-NEXT: s_and_b32 s24, s3, s22 6390; GCN-NOHSA-VI-NEXT: s_and_b32 s25, s4, s22 6391; GCN-NOHSA-VI-NEXT: s_and_b32 s26, s5, s22 6392; GCN-NOHSA-VI-NEXT: s_and_b32 s27, s6, s22 6393; GCN-NOHSA-VI-NEXT: s_and_b32 s28, s7, s22 6394; GCN-NOHSA-VI-NEXT: s_and_b32 s29, s8, s22 6395; GCN-NOHSA-VI-NEXT: s_and_b32 s30, s9, s22 6396; GCN-NOHSA-VI-NEXT: s_and_b32 s31, s10, s22 6397; GCN-NOHSA-VI-NEXT: s_and_b32 s33, s11, s22 6398; GCN-NOHSA-VI-NEXT: s_and_b32 s34, s12, s22 6399; GCN-NOHSA-VI-NEXT: s_and_b32 s35, s13, s22 6400; GCN-NOHSA-VI-NEXT: s_and_b32 s36, s14, s22 6401; GCN-NOHSA-VI-NEXT: s_and_b32 s22, s15, s22 6402; GCN-NOHSA-VI-NEXT: s_lshr_b32 s15, s15, 16 6403; GCN-NOHSA-VI-NEXT: s_lshr_b32 s14, s14, 16 6404; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22 6405; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s15 6406; GCN-NOHSA-VI-NEXT: s_lshr_b32 s13, s13, 16 6407; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:240 6408; GCN-NOHSA-VI-NEXT: s_lshr_b32 s12, s12, 16 6409; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36 6410; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s14 6411; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:224 6412; GCN-NOHSA-VI-NEXT: s_lshr_b32 s11, s11, 16 6413; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s35 6414; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s13 6415; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:208 6416; GCN-NOHSA-VI-NEXT: s_lshr_b32 s10, s10, 16 6417; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s34 6418; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s12 6419; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:192 6420; GCN-NOHSA-VI-NEXT: s_lshr_b32 s9, s9, 16 6421; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s33 6422; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s11 6423; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:176 6424; GCN-NOHSA-VI-NEXT: s_lshr_b32 s8, s8, 16 6425; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s31 6426; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 6427; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:160 6428; GCN-NOHSA-VI-NEXT: s_lshr_b32 s7, s7, 16 6429; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s30 6430; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s9 6431; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:144 6432; GCN-NOHSA-VI-NEXT: s_lshr_b32 s6, s6, 16 6433; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s29 6434; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s8 6435; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:128 6436; GCN-NOHSA-VI-NEXT: s_lshr_b32 s5, s5, 16 6437; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 6438; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s7 6439; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:112 6440; GCN-NOHSA-VI-NEXT: s_lshr_b32 s4, s4, 16 6441; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s27 6442; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 6443; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:96 6444; GCN-NOHSA-VI-NEXT: s_lshr_b32 s3, s3, 16 6445; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s26 6446; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s5 6447; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:80 6448; GCN-NOHSA-VI-NEXT: s_lshr_b32 s2, s2, 16 6449; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s25 6450; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 6451; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:64 6452; GCN-NOHSA-VI-NEXT: s_lshr_b32 s1, s1, 16 6453; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 6454; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s3 6455; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:48 6456; GCN-NOHSA-VI-NEXT: s_lshr_b32 s0, s0, 16 6457; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s23 6458; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s2 6459; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:32 6460; GCN-NOHSA-VI-NEXT: s_nop 0 6461; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s21 6462; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s1 6463; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16 6464; GCN-NOHSA-VI-NEXT: s_nop 0 6465; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20 6466; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s0 6467; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 6468; GCN-NOHSA-VI-NEXT: s_endpgm 6469; 6470; EG-LABEL: constant_zextload_v32i16_to_v32i64: 6471; EG: ; %bb.0: 6472; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 6473; EG-NEXT: TEX 2 @22 6474; EG-NEXT: ALU 33, @31, KC0[], KC1[] 6475; EG-NEXT: TEX 0 @28 6476; EG-NEXT: ALU 92, @65, KC0[CB0:0-32], KC1[] 6477; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T50.X, 0 6478; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T49.X, 0 6479; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T48.X, 0 6480; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T47.X, 0 6481; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T46.X, 0 6482; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T45.X, 0 6483; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T44.X, 0 6484; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T43.X, 0 6485; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T42.X, 0 6486; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T41.X, 0 6487; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T40.X, 0 6488; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T39.X, 0 6489; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T38.X, 0 6490; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T37.X, 0 6491; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T36.X, 0 6492; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T35.X, 1 6493; EG-NEXT: CF_END 6494; EG-NEXT: Fetch clause starting at 22: 6495; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1 6496; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 16, #1 6497; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1 6498; EG-NEXT: Fetch clause starting at 28: 6499; EG-NEXT: VTX_READ_128 T29.XYZW, T19.X, 0, #1 6500; EG-NEXT: ALU clause starting at 30: 6501; EG-NEXT: MOV * T19.X, KC0[2].Z, 6502; EG-NEXT: ALU clause starting at 31: 6503; EG-NEXT: LSHR * T23.Z, T20.W, literal.x, 6504; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6505; EG-NEXT: AND_INT T23.X, T20.W, literal.x, 6506; EG-NEXT: MOV T23.Y, 0.0, 6507; EG-NEXT: LSHR T24.Z, T20.Z, literal.y, 6508; EG-NEXT: AND_INT * T24.X, T20.Z, literal.x, 6509; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6510; EG-NEXT: MOV T24.Y, 0.0, 6511; EG-NEXT: LSHR * T25.Z, T20.Y, literal.x, 6512; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6513; EG-NEXT: AND_INT T25.X, T20.Y, literal.x, 6514; EG-NEXT: MOV T25.Y, 0.0, 6515; EG-NEXT: LSHR T20.Z, T20.X, literal.y, 6516; EG-NEXT: AND_INT * T20.X, T20.X, literal.x, 6517; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6518; EG-NEXT: MOV T20.Y, 0.0, 6519; EG-NEXT: LSHR * T26.Z, T22.W, literal.x, 6520; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6521; EG-NEXT: AND_INT T26.X, T22.W, literal.x, 6522; EG-NEXT: MOV T26.Y, 0.0, 6523; EG-NEXT: LSHR T27.Z, T22.Z, literal.y, 6524; EG-NEXT: AND_INT * T27.X, T22.Z, literal.x, 6525; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6526; EG-NEXT: MOV T27.Y, 0.0, 6527; EG-NEXT: LSHR * T28.Z, T22.Y, literal.x, 6528; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6529; EG-NEXT: AND_INT T28.X, T22.Y, literal.x, 6530; EG-NEXT: MOV T28.Y, 0.0, 6531; EG-NEXT: LSHR T22.Z, T22.X, literal.y, 6532; EG-NEXT: AND_INT * T22.X, T22.X, literal.x, 6533; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6534; EG-NEXT: MOV T22.Y, 0.0, 6535; EG-NEXT: LSHR * T19.Z, T21.W, literal.x, 6536; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6537; EG-NEXT: ALU clause starting at 65: 6538; EG-NEXT: AND_INT T19.X, T21.W, literal.x, 6539; EG-NEXT: MOV T19.Y, 0.0, 6540; EG-NEXT: LSHR T30.Z, T21.Z, literal.y, 6541; EG-NEXT: AND_INT * T30.X, T21.Z, literal.x, 6542; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6543; EG-NEXT: MOV T30.Y, 0.0, 6544; EG-NEXT: LSHR * T31.Z, T21.Y, literal.x, 6545; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6546; EG-NEXT: AND_INT T31.X, T21.Y, literal.x, 6547; EG-NEXT: MOV T31.Y, 0.0, 6548; EG-NEXT: LSHR T21.Z, T21.X, literal.y, 6549; EG-NEXT: AND_INT * T21.X, T21.X, literal.x, 6550; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6551; EG-NEXT: MOV T21.Y, 0.0, 6552; EG-NEXT: LSHR * T32.Z, T29.W, literal.x, 6553; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6554; EG-NEXT: AND_INT T32.X, T29.W, literal.x, 6555; EG-NEXT: MOV T32.Y, 0.0, 6556; EG-NEXT: LSHR T33.Z, T29.Z, literal.y, 6557; EG-NEXT: AND_INT * T33.X, T29.Z, literal.x, 6558; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6559; EG-NEXT: MOV T33.Y, 0.0, 6560; EG-NEXT: LSHR * T34.Z, T29.Y, literal.x, 6561; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6562; EG-NEXT: AND_INT T34.X, T29.Y, literal.x, 6563; EG-NEXT: MOV T34.Y, 0.0, 6564; EG-NEXT: LSHR T29.Z, T29.X, literal.y, 6565; EG-NEXT: AND_INT * T29.X, T29.X, literal.x, 6566; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6567; EG-NEXT: MOV T29.Y, 0.0, 6568; EG-NEXT: MOV T23.W, 0.0, 6569; EG-NEXT: MOV * T24.W, 0.0, 6570; EG-NEXT: MOV T25.W, 0.0, 6571; EG-NEXT: MOV * T20.W, 0.0, 6572; EG-NEXT: MOV T26.W, 0.0, 6573; EG-NEXT: MOV * T27.W, 0.0, 6574; EG-NEXT: MOV T28.W, 0.0, 6575; EG-NEXT: MOV * T22.W, 0.0, 6576; EG-NEXT: MOV T19.W, 0.0, 6577; EG-NEXT: MOV * T30.W, 0.0, 6578; EG-NEXT: MOV T31.W, 0.0, 6579; EG-NEXT: MOV * T21.W, 0.0, 6580; EG-NEXT: MOV T32.W, 0.0, 6581; EG-NEXT: MOV * T33.W, 0.0, 6582; EG-NEXT: MOV T34.W, 0.0, 6583; EG-NEXT: MOV * T29.W, 0.0, 6584; EG-NEXT: LSHR T35.X, KC0[2].Y, literal.x, 6585; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6586; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6587; EG-NEXT: LSHR T36.X, PV.W, literal.x, 6588; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6589; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6590; EG-NEXT: LSHR T37.X, PV.W, literal.x, 6591; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6592; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6593; EG-NEXT: LSHR T38.X, PV.W, literal.x, 6594; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6595; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 6596; EG-NEXT: LSHR T39.X, PV.W, literal.x, 6597; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6598; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 6599; EG-NEXT: LSHR T40.X, PV.W, literal.x, 6600; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6601; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 6602; EG-NEXT: LSHR T41.X, PV.W, literal.x, 6603; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6604; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 6605; EG-NEXT: LSHR T42.X, PV.W, literal.x, 6606; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6607; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 6608; EG-NEXT: LSHR T43.X, PV.W, literal.x, 6609; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6610; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 6611; EG-NEXT: LSHR T44.X, PV.W, literal.x, 6612; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6613; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 6614; EG-NEXT: LSHR T45.X, PV.W, literal.x, 6615; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6616; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 6617; EG-NEXT: LSHR T46.X, PV.W, literal.x, 6618; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6619; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 6620; EG-NEXT: LSHR T47.X, PV.W, literal.x, 6621; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6622; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 6623; EG-NEXT: LSHR T48.X, PV.W, literal.x, 6624; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6625; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) 6626; EG-NEXT: LSHR T49.X, PV.W, literal.x, 6627; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6628; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) 6629; EG-NEXT: LSHR * T50.X, PV.W, literal.x, 6630; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6631 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in 6632 %ext = zext <32 x i16> %load to <32 x i64> 6633 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 6634 ret void 6635} 6636 6637define amdgpu_kernel void @constant_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(4)* %in) #0 { 6638; GCN-NOHSA-SI-LABEL: constant_sextload_v32i16_to_v32i64: 6639; GCN-NOHSA-SI: ; %bb.0: 6640; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x9 6641; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6642; GCN-NOHSA-SI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 6643; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6644; GCN-NOHSA-SI-NEXT: s_mov_b32 s18, s15 6645; GCN-NOHSA-SI-NEXT: s_mov_b32 s20, s13 6646; GCN-NOHSA-SI-NEXT: s_mov_b32 s36, s11 6647; GCN-NOHSA-SI-NEXT: s_mov_b32 s40, s9 6648; GCN-NOHSA-SI-NEXT: s_mov_b32 s44, s7 6649; GCN-NOHSA-SI-NEXT: s_mov_b32 s46, s5 6650; GCN-NOHSA-SI-NEXT: s_mov_b32 s38, s3 6651; GCN-NOHSA-SI-NEXT: s_mov_b32 s42, s1 6652; GCN-NOHSA-SI-NEXT: s_lshr_b32 s24, s14, 16 6653; GCN-NOHSA-SI-NEXT: s_lshr_b32 s26, s12, 16 6654; GCN-NOHSA-SI-NEXT: s_lshr_b32 s28, s10, 16 6655; GCN-NOHSA-SI-NEXT: s_lshr_b32 s34, s8, 16 6656; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[48:49], s[20:21], 0x100000 6657; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[50:51], s[18:19], 0x100000 6658; GCN-NOHSA-SI-NEXT: s_lshr_b32 s52, s6, 16 6659; GCN-NOHSA-SI-NEXT: s_lshr_b32 s54, s4, 16 6660; GCN-NOHSA-SI-NEXT: s_lshr_b32 s56, s2, 16 6661; GCN-NOHSA-SI-NEXT: s_lshr_b32 s58, s0, 16 6662; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x100000 6663; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[20:21], s[2:3], 0x100000 6664; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[22:23], s[4:5], 0x100000 6665; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[30:31], s[6:7], 0x100000 6666; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[60:61], s[8:9], 0x100000 6667; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[62:63], s[10:11], 0x100000 6668; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[64:65], s[12:13], 0x100000 6669; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[66:67], s[14:15], 0x100000 6670; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[68:69], s[0:1], 48 6671; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[70:71], s[2:3], 48 6672; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[6:7], s[6:7], 48 6673; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[8:9], s[8:9], 48 6674; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[10:11], s[10:11], 48 6675; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[2:3], s[12:13], 48 6676; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[12:13], s[14:15], 48 6677; GCN-NOHSA-SI-NEXT: s_ashr_i64 s[4:5], s[4:5], 48 6678; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s16 6679; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s17 6680; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s50 6681; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s51 6682; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s12 6683; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s13 6684; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s48 6685; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s49 6686; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s2 6687; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s3 6688; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6689; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6690; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[46:47], 0x100000 6691; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[44:45], 0x100000 6692; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[40:41], 0x100000 6693; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[36:37], s[36:37], 0x100000 6694; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[40:41], s[42:43], 0x100000 6695; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[38:39], s[38:39], 0x100000 6696; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s36 6697; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s37 6698; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s10 6699; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s11 6700; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s16 6701; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s17 6702; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s8 6703; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s9 6704; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s14 6705; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s15 6706; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s6 6707; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s7 6708; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s12 6709; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s13 6710; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s4 6711; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s5 6712; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 6713; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[4:5], s[58:59], 0x100000 6714; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[6:7], s[56:57], 0x100000 6715; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[8:9], s[54:55], 0x100000 6716; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[10:11], s[52:53], 0x100000 6717; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[12:13], s[34:35], 0x100000 6718; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[14:15], s[28:29], 0x100000 6719; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[16:17], s[26:27], 0x100000 6720; GCN-NOHSA-SI-NEXT: s_bfe_i64 s[24:25], s[24:25], 0x100000 6721; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:208 6722; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:176 6723; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:144 6724; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:112 6725; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:80 6726; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(5) 6727; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s38 6728; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s39 6729; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s70 6730; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s71 6731; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6732; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6733; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s40 6734; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s41 6735; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s68 6736; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s69 6737; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6738; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6739; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s66 6740; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s67 6741; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v4, s64 6742; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, s65 6743; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, s62 6744; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, s63 6745; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, s60 6746; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, s61 6747; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, s30 6748; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, s31 6749; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, s22 6750; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, s23 6751; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v24, s20 6752; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v25, s21 6753; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s24 6754; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s25 6755; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 6756; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6757; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, s18 6758; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, s19 6759; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, s16 6760; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, s17 6761; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192 6762; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v10, s14 6763; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, s15 6764; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:160 6765; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v14, s12 6766; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, s13 6767; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:128 6768; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v18, s10 6769; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, s11 6770; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:96 6771; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, s8 6772; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, s9 6773; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64 6774; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v26, s6 6775; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, s7 6776; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32 6777; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, s4 6778; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, s5 6779; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 6780; GCN-NOHSA-SI-NEXT: s_endpgm 6781; 6782; GCN-HSA-LABEL: constant_sextload_v32i16_to_v32i64: 6783; GCN-HSA: ; %bb.0: 6784; GCN-HSA-NEXT: s_load_dwordx4 s[16:19], s[4:5], 0x0 6785; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6786; GCN-HSA-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 6787; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6788; GCN-HSA-NEXT: s_mov_b32 s42, s15 6789; GCN-HSA-NEXT: s_mov_b32 s44, s13 6790; GCN-HSA-NEXT: s_mov_b32 s46, s11 6791; GCN-HSA-NEXT: s_mov_b32 s48, s9 6792; GCN-HSA-NEXT: s_mov_b32 s50, s7 6793; GCN-HSA-NEXT: s_mov_b32 s52, s5 6794; GCN-HSA-NEXT: s_mov_b32 s54, s3 6795; GCN-HSA-NEXT: s_mov_b32 s56, s1 6796; GCN-HSA-NEXT: s_lshr_b32 s58, s14, 16 6797; GCN-HSA-NEXT: s_lshr_b32 s60, s12, 16 6798; GCN-HSA-NEXT: s_lshr_b32 s62, s10, 16 6799; GCN-HSA-NEXT: s_lshr_b32 s64, s8, 16 6800; GCN-HSA-NEXT: s_lshr_b32 s66, s6, 16 6801; GCN-HSA-NEXT: s_lshr_b32 s68, s4, 16 6802; GCN-HSA-NEXT: s_lshr_b32 s70, s2, 16 6803; GCN-HSA-NEXT: s_lshr_b32 s72, s0, 16 6804; GCN-HSA-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x100000 6805; GCN-HSA-NEXT: s_bfe_i64 s[20:21], s[2:3], 0x100000 6806; GCN-HSA-NEXT: s_ashr_i64 s[36:37], s[0:1], 48 6807; GCN-HSA-NEXT: s_ashr_i64 s[38:39], s[2:3], 48 6808; GCN-HSA-NEXT: s_ashr_i64 s[0:1], s[14:15], 48 6809; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[42:43], 0x100000 6810; GCN-HSA-NEXT: s_bfe_i64 s[22:23], s[4:5], 0x100000 6811; GCN-HSA-NEXT: s_bfe_i64 s[24:25], s[6:7], 0x100000 6812; GCN-HSA-NEXT: s_bfe_i64 s[26:27], s[8:9], 0x100000 6813; GCN-HSA-NEXT: s_bfe_i64 s[28:29], s[10:11], 0x100000 6814; GCN-HSA-NEXT: s_bfe_i64 s[30:31], s[12:13], 0x100000 6815; GCN-HSA-NEXT: s_bfe_i64 s[34:35], s[14:15], 0x100000 6816; GCN-HSA-NEXT: s_ashr_i64 s[40:41], s[4:5], 48 6817; GCN-HSA-NEXT: s_ashr_i64 s[74:75], s[6:7], 48 6818; GCN-HSA-NEXT: s_ashr_i64 s[76:77], s[8:9], 48 6819; GCN-HSA-NEXT: s_ashr_i64 s[78:79], s[10:11], 48 6820; GCN-HSA-NEXT: s_ashr_i64 s[80:81], s[12:13], 48 6821; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6822; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6823; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 6824; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 6825; GCN-HSA-NEXT: s_bfe_i64 s[0:1], s[72:73], 0x100000 6826; GCN-HSA-NEXT: s_bfe_i64 s[2:3], s[70:71], 0x100000 6827; GCN-HSA-NEXT: s_bfe_i64 s[4:5], s[68:69], 0x100000 6828; GCN-HSA-NEXT: s_bfe_i64 s[6:7], s[66:67], 0x100000 6829; GCN-HSA-NEXT: s_bfe_i64 s[8:9], s[64:65], 0x100000 6830; GCN-HSA-NEXT: s_bfe_i64 s[10:11], s[62:63], 0x100000 6831; GCN-HSA-NEXT: s_bfe_i64 s[12:13], s[60:61], 0x100000 6832; GCN-HSA-NEXT: s_bfe_i64 s[14:15], s[58:59], 0x100000 6833; GCN-HSA-NEXT: s_bfe_i64 s[42:43], s[56:57], 0x100000 6834; GCN-HSA-NEXT: s_bfe_i64 s[54:55], s[54:55], 0x100000 6835; GCN-HSA-NEXT: s_bfe_i64 s[52:53], s[52:53], 0x100000 6836; GCN-HSA-NEXT: s_bfe_i64 s[50:51], s[50:51], 0x100000 6837; GCN-HSA-NEXT: s_bfe_i64 s[48:49], s[48:49], 0x100000 6838; GCN-HSA-NEXT: s_bfe_i64 s[46:47], s[46:47], 0x100000 6839; GCN-HSA-NEXT: s_bfe_i64 s[44:45], s[44:45], 0x100000 6840; GCN-HSA-NEXT: s_add_u32 s56, s16, 0xf0 6841; GCN-HSA-NEXT: s_addc_u32 s57, s17, 0 6842; GCN-HSA-NEXT: v_mov_b32_e32 v4, s44 6843; GCN-HSA-NEXT: s_add_u32 s44, s16, 0xd0 6844; GCN-HSA-NEXT: v_mov_b32_e32 v5, s45 6845; GCN-HSA-NEXT: s_addc_u32 s45, s17, 0 6846; GCN-HSA-NEXT: v_mov_b32_e32 v24, s44 6847; GCN-HSA-NEXT: v_mov_b32_e32 v25, s45 6848; GCN-HSA-NEXT: s_add_u32 s44, s16, 0xb0 6849; GCN-HSA-NEXT: s_addc_u32 s45, s17, 0 6850; GCN-HSA-NEXT: v_mov_b32_e32 v26, s44 6851; GCN-HSA-NEXT: v_mov_b32_e32 v27, s45 6852; GCN-HSA-NEXT: s_add_u32 s44, s16, 0x90 6853; GCN-HSA-NEXT: s_addc_u32 s45, s17, 0 6854; GCN-HSA-NEXT: v_mov_b32_e32 v28, s44 6855; GCN-HSA-NEXT: v_mov_b32_e32 v22, s56 6856; GCN-HSA-NEXT: v_mov_b32_e32 v29, s45 6857; GCN-HSA-NEXT: s_add_u32 s44, s16, 0x70 6858; GCN-HSA-NEXT: v_mov_b32_e32 v23, s57 6859; GCN-HSA-NEXT: s_addc_u32 s45, s17, 0 6860; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[0:3] 6861; GCN-HSA-NEXT: v_mov_b32_e32 v22, s40 6862; GCN-HSA-NEXT: s_add_u32 s40, s16, 0x50 6863; GCN-HSA-NEXT: v_mov_b32_e32 v23, s41 6864; GCN-HSA-NEXT: s_addc_u32 s41, s17, 0 6865; GCN-HSA-NEXT: v_mov_b32_e32 v6, s80 6866; GCN-HSA-NEXT: v_mov_b32_e32 v7, s81 6867; GCN-HSA-NEXT: v_mov_b32_e32 v2, s38 6868; GCN-HSA-NEXT: s_add_u32 s38, s16, 48 6869; GCN-HSA-NEXT: v_mov_b32_e32 v3, s39 6870; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[4:7] 6871; GCN-HSA-NEXT: s_addc_u32 s39, s17, 0 6872; GCN-HSA-NEXT: v_mov_b32_e32 v24, s38 6873; GCN-HSA-NEXT: v_mov_b32_e32 v25, s39 6874; GCN-HSA-NEXT: s_add_u32 s38, s16, 16 6875; GCN-HSA-NEXT: v_mov_b32_e32 v8, s46 6876; GCN-HSA-NEXT: v_mov_b32_e32 v9, s47 6877; GCN-HSA-NEXT: v_mov_b32_e32 v10, s78 6878; GCN-HSA-NEXT: v_mov_b32_e32 v11, s79 6879; GCN-HSA-NEXT: s_addc_u32 s39, s17, 0 6880; GCN-HSA-NEXT: flat_store_dwordx4 v[26:27], v[8:11] 6881; GCN-HSA-NEXT: v_mov_b32_e32 v12, s48 6882; GCN-HSA-NEXT: v_mov_b32_e32 v10, s14 6883; GCN-HSA-NEXT: s_add_u32 s14, s16, 0xe0 6884; GCN-HSA-NEXT: v_mov_b32_e32 v13, s49 6885; GCN-HSA-NEXT: v_mov_b32_e32 v14, s76 6886; GCN-HSA-NEXT: v_mov_b32_e32 v15, s77 6887; GCN-HSA-NEXT: v_mov_b32_e32 v11, s15 6888; GCN-HSA-NEXT: s_addc_u32 s15, s17, 0 6889; GCN-HSA-NEXT: v_mov_b32_e32 v30, s44 6890; GCN-HSA-NEXT: flat_store_dwordx4 v[28:29], v[12:15] 6891; GCN-HSA-NEXT: v_mov_b32_e32 v16, s50 6892; GCN-HSA-NEXT: v_mov_b32_e32 v14, s12 6893; GCN-HSA-NEXT: s_add_u32 s12, s16, 0xc0 6894; GCN-HSA-NEXT: v_mov_b32_e32 v17, s51 6895; GCN-HSA-NEXT: v_mov_b32_e32 v18, s74 6896; GCN-HSA-NEXT: v_mov_b32_e32 v19, s75 6897; GCN-HSA-NEXT: v_mov_b32_e32 v31, s45 6898; GCN-HSA-NEXT: v_mov_b32_e32 v32, s40 6899; GCN-HSA-NEXT: v_mov_b32_e32 v15, s13 6900; GCN-HSA-NEXT: s_addc_u32 s13, s17, 0 6901; GCN-HSA-NEXT: v_mov_b32_e32 v20, s52 6902; GCN-HSA-NEXT: v_mov_b32_e32 v21, s53 6903; GCN-HSA-NEXT: v_mov_b32_e32 v0, s54 6904; GCN-HSA-NEXT: v_mov_b32_e32 v1, s55 6905; GCN-HSA-NEXT: v_mov_b32_e32 v33, s41 6906; GCN-HSA-NEXT: v_mov_b32_e32 v34, s38 6907; GCN-HSA-NEXT: flat_store_dwordx4 v[30:31], v[16:19] 6908; GCN-HSA-NEXT: v_mov_b32_e32 v4, s42 6909; GCN-HSA-NEXT: v_mov_b32_e32 v17, s15 6910; GCN-HSA-NEXT: v_mov_b32_e32 v19, s13 6911; GCN-HSA-NEXT: v_mov_b32_e32 v5, s43 6912; GCN-HSA-NEXT: v_mov_b32_e32 v35, s39 6913; GCN-HSA-NEXT: v_mov_b32_e32 v6, s36 6914; GCN-HSA-NEXT: v_mov_b32_e32 v7, s37 6915; GCN-HSA-NEXT: v_mov_b32_e32 v8, s34 6916; GCN-HSA-NEXT: v_mov_b32_e32 v9, s35 6917; GCN-HSA-NEXT: v_mov_b32_e32 v12, s30 6918; GCN-HSA-NEXT: v_mov_b32_e32 v13, s31 6919; GCN-HSA-NEXT: v_mov_b32_e32 v16, s14 6920; GCN-HSA-NEXT: flat_store_dwordx4 v[32:33], v[20:23] 6921; GCN-HSA-NEXT: v_mov_b32_e32 v18, s12 6922; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3] 6923; GCN-HSA-NEXT: flat_store_dwordx4 v[34:35], v[4:7] 6924; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 6925; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 6926; GCN-HSA-NEXT: v_mov_b32_e32 v2, s10 6927; GCN-HSA-NEXT: s_add_u32 s10, s16, 0xa0 6928; GCN-HSA-NEXT: v_mov_b32_e32 v3, s11 6929; GCN-HSA-NEXT: s_addc_u32 s11, s17, 0 6930; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 6931; GCN-HSA-NEXT: v_mov_b32_e32 v0, s28 6932; GCN-HSA-NEXT: v_mov_b32_e32 v1, s29 6933; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 6934; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6935; GCN-HSA-NEXT: s_nop 0 6936; GCN-HSA-NEXT: v_mov_b32_e32 v2, s8 6937; GCN-HSA-NEXT: s_add_u32 s8, s16, 0x80 6938; GCN-HSA-NEXT: v_mov_b32_e32 v3, s9 6939; GCN-HSA-NEXT: s_addc_u32 s9, s17, 0 6940; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8 6941; GCN-HSA-NEXT: v_mov_b32_e32 v0, s26 6942; GCN-HSA-NEXT: v_mov_b32_e32 v1, s27 6943; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9 6944; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6945; GCN-HSA-NEXT: s_nop 0 6946; GCN-HSA-NEXT: v_mov_b32_e32 v2, s6 6947; GCN-HSA-NEXT: s_add_u32 s6, s16, 0x60 6948; GCN-HSA-NEXT: v_mov_b32_e32 v3, s7 6949; GCN-HSA-NEXT: s_addc_u32 s7, s17, 0 6950; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6 6951; GCN-HSA-NEXT: v_mov_b32_e32 v0, s24 6952; GCN-HSA-NEXT: v_mov_b32_e32 v1, s25 6953; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7 6954; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6955; GCN-HSA-NEXT: s_nop 0 6956; GCN-HSA-NEXT: v_mov_b32_e32 v2, s4 6957; GCN-HSA-NEXT: s_add_u32 s4, s16, 64 6958; GCN-HSA-NEXT: v_mov_b32_e32 v3, s5 6959; GCN-HSA-NEXT: s_addc_u32 s5, s17, 0 6960; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 6961; GCN-HSA-NEXT: v_mov_b32_e32 v0, s22 6962; GCN-HSA-NEXT: v_mov_b32_e32 v1, s23 6963; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 6964; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6965; GCN-HSA-NEXT: s_nop 0 6966; GCN-HSA-NEXT: v_mov_b32_e32 v2, s2 6967; GCN-HSA-NEXT: s_add_u32 s2, s16, 32 6968; GCN-HSA-NEXT: v_mov_b32_e32 v3, s3 6969; GCN-HSA-NEXT: s_addc_u32 s3, s17, 0 6970; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6971; GCN-HSA-NEXT: v_mov_b32_e32 v0, s20 6972; GCN-HSA-NEXT: v_mov_b32_e32 v1, s21 6973; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6974; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6975; GCN-HSA-NEXT: v_mov_b32_e32 v4, s16 6976; GCN-HSA-NEXT: v_mov_b32_e32 v0, s18 6977; GCN-HSA-NEXT: v_mov_b32_e32 v1, s19 6978; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 6979; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 6980; GCN-HSA-NEXT: v_mov_b32_e32 v5, s17 6981; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6982; GCN-HSA-NEXT: s_endpgm 6983; 6984; GCN-NOHSA-VI-LABEL: constant_sextload_v32i16_to_v32i64: 6985; GCN-NOHSA-VI: ; %bb.0: 6986; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x24 6987; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6988; GCN-NOHSA-VI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 6989; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6990; GCN-NOHSA-VI-NEXT: s_mov_b32 s30, s1 6991; GCN-NOHSA-VI-NEXT: s_lshr_b32 s34, s1, 16 6992; GCN-NOHSA-VI-NEXT: s_lshr_b32 s46, s5, 16 6993; GCN-NOHSA-VI-NEXT: s_lshr_b32 s48, s6, 16 6994; GCN-NOHSA-VI-NEXT: s_lshr_b32 s62, s10, 16 6995; GCN-NOHSA-VI-NEXT: s_mov_b32 s64, s11 6996; GCN-NOHSA-VI-NEXT: s_mov_b32 s80, s15 6997; GCN-NOHSA-VI-NEXT: s_lshr_b32 s82, s15, 16 6998; GCN-NOHSA-VI-NEXT: s_lshr_b32 s28, s0, 16 6999; GCN-NOHSA-VI-NEXT: s_mov_b32 s44, s5 7000; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[24:25], s[6:7], 0x100000 7001; GCN-NOHSA-VI-NEXT: s_mov_b32 s50, s7 7002; GCN-NOHSA-VI-NEXT: s_lshr_b32 s52, s7, 16 7003; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[26:27], s[8:9], 0x100000 7004; GCN-NOHSA-VI-NEXT: s_lshr_b32 s54, s8, 16 7005; GCN-NOHSA-VI-NEXT: s_mov_b32 s56, s9 7006; GCN-NOHSA-VI-NEXT: s_lshr_b32 s58, s9, 16 7007; GCN-NOHSA-VI-NEXT: s_lshr_b32 s78, s14, 16 7008; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[8:9], s[30:31], 0x100000 7009; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[6:7], s[34:35], 0x100000 7010; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[30:31], s[46:47], 0x100000 7011; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[34:35], s[48:49], 0x100000 7012; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[46:47], s[62:63], 0x100000 7013; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[48:49], s[64:65], 0x100000 7014; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[62:63], s[80:81], 0x100000 7015; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[64:65], s[82:83], 0x100000 7016; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[18:19], s[0:1], 0x100000 7017; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[20:21], s[2:3], 0x100000 7018; GCN-NOHSA-VI-NEXT: s_lshr_b32 s36, s2, 16 7019; GCN-NOHSA-VI-NEXT: s_mov_b32 s38, s3 7020; GCN-NOHSA-VI-NEXT: s_lshr_b32 s40, s3, 16 7021; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[22:23], s[4:5], 0x100000 7022; GCN-NOHSA-VI-NEXT: s_lshr_b32 s42, s4, 16 7023; GCN-NOHSA-VI-NEXT: s_mov_b32 s72, s13 7024; GCN-NOHSA-VI-NEXT: s_lshr_b32 s74, s13, 16 7025; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[76:77], s[14:15], 0x100000 7026; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 7027; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 7028; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s16 7029; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s17 7030; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[4:5], s[28:29], 0x100000 7031; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[28:29], s[44:45], 0x100000 7032; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[44:45], s[58:59], 0x100000 7033; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[58:59], s[78:79], 0x100000 7034; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s62 7035; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s63 7036; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s64 7037; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s65 7038; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[68:69], s[12:13], 0x100000 7039; GCN-NOHSA-VI-NEXT: s_lshr_b32 s70, s12, 16 7040; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[12:13], s[40:41], 0x100000 7041; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[16:17], s[42:43], 0x100000 7042; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[40:41], s[54:55], 0x100000 7043; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[42:43], s[56:57], 0x100000 7044; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[54:55], s[72:73], 0x100000 7045; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[56:57], s[74:75], 0x100000 7046; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 7047; GCN-NOHSA-VI-NEXT: s_lshr_b32 s66, s11, 16 7048; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s76 7049; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s77 7050; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s58 7051; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s59 7052; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[14:15], s[38:39], 0x100000 7053; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[38:39], s[52:53], 0x100000 7054; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[52:53], s[70:71], 0x100000 7055; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 7056; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[60:61], s[10:11], 0x100000 7057; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s54 7058; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s55 7059; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s56 7060; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s57 7061; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[10:11], s[36:37], 0x100000 7062; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[36:37], s[50:51], 0x100000 7063; GCN-NOHSA-VI-NEXT: s_bfe_i64 s[50:51], s[66:67], 0x100000 7064; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:208 7065; GCN-NOHSA-VI-NEXT: s_nop 0 7066; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s68 7067; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s69 7068; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s52 7069; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s53 7070; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:192 7071; GCN-NOHSA-VI-NEXT: s_nop 0 7072; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s48 7073; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s49 7074; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s50 7075; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s51 7076; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:176 7077; GCN-NOHSA-VI-NEXT: s_nop 0 7078; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s60 7079; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s61 7080; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s46 7081; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s47 7082; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:160 7083; GCN-NOHSA-VI-NEXT: s_nop 0 7084; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s42 7085; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s43 7086; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s44 7087; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s45 7088; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:144 7089; GCN-NOHSA-VI-NEXT: s_nop 0 7090; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s26 7091; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s27 7092; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s40 7093; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s41 7094; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:128 7095; GCN-NOHSA-VI-NEXT: s_nop 0 7096; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s36 7097; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s37 7098; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s38 7099; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s39 7100; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:112 7101; GCN-NOHSA-VI-NEXT: s_nop 0 7102; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s24 7103; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s25 7104; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s34 7105; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s35 7106; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:96 7107; GCN-NOHSA-VI-NEXT: s_nop 0 7108; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s28 7109; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s29 7110; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s30 7111; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s31 7112; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:80 7113; GCN-NOHSA-VI-NEXT: s_nop 0 7114; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s22 7115; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s23 7116; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s16 7117; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s17 7118; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:64 7119; GCN-NOHSA-VI-NEXT: s_nop 0 7120; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s14 7121; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s15 7122; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s12 7123; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s13 7124; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 7125; GCN-NOHSA-VI-NEXT: s_nop 0 7126; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s20 7127; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s21 7128; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s10 7129; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s11 7130; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 7131; GCN-NOHSA-VI-NEXT: s_nop 0 7132; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s8 7133; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s9 7134; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s6 7135; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s7 7136; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 7137; GCN-NOHSA-VI-NEXT: s_nop 0 7138; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v0, s18 7139; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, s19 7140; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v2, s4 7141; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, s5 7142; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 7143; GCN-NOHSA-VI-NEXT: s_endpgm 7144; 7145; EG-LABEL: constant_sextload_v32i16_to_v32i64: 7146; EG: ; %bb.0: 7147; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 7148; EG-NEXT: TEX 0 @22 7149; EG-NEXT: ALU 55, @31, KC0[CB0:0-32], KC1[] 7150; EG-NEXT: TEX 2 @24 7151; EG-NEXT: ALU 74, @87, KC0[CB0:0-32], KC1[] 7152; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T38.X, 0 7153; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T36.X, 0 7154; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T34.X, 0 7155; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T33.X, 0 7156; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T32.X, 0 7157; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T31.X, 0 7158; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T30.X, 0 7159; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T29.X, 0 7160; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T28.X, 0 7161; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T27.X, 0 7162; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0 7163; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T25.X, 0 7164; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T24.X, 0 7165; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 0 7166; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T22.X, 0 7167; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T21.X, 1 7168; EG-NEXT: CF_END 7169; EG-NEXT: Fetch clause starting at 22: 7170; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 7171; EG-NEXT: Fetch clause starting at 24: 7172; EG-NEXT: VTX_READ_128 T38.XYZW, T19.X, 48, #1 7173; EG-NEXT: VTX_READ_128 T39.XYZW, T19.X, 32, #1 7174; EG-NEXT: VTX_READ_128 T40.XYZW, T19.X, 16, #1 7175; EG-NEXT: ALU clause starting at 30: 7176; EG-NEXT: MOV * T19.X, KC0[2].Z, 7177; EG-NEXT: ALU clause starting at 31: 7178; EG-NEXT: LSHR T21.X, KC0[2].Y, literal.x, 7179; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7180; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7181; EG-NEXT: LSHR T22.X, PV.W, literal.x, 7182; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7183; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7184; EG-NEXT: LSHR T23.X, PV.W, literal.x, 7185; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7186; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7187; EG-NEXT: LSHR T24.X, PV.W, literal.x, 7188; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7189; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7190; EG-NEXT: LSHR T25.X, PV.W, literal.x, 7191; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7192; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7193; EG-NEXT: LSHR T26.X, PV.W, literal.x, 7194; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7195; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7196; EG-NEXT: LSHR T27.X, PV.W, literal.x, 7197; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7198; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 7199; EG-NEXT: LSHR T28.X, PV.W, literal.x, 7200; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7201; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 7202; EG-NEXT: LSHR T29.X, PV.W, literal.x, 7203; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7204; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 7205; EG-NEXT: LSHR T30.X, PV.W, literal.x, 7206; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7207; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 7208; EG-NEXT: LSHR T31.X, PV.W, literal.x, 7209; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7210; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 7211; EG-NEXT: LSHR T32.X, PV.W, literal.x, 7212; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7213; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 7214; EG-NEXT: LSHR T33.X, PV.W, literal.x, 7215; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7216; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 7217; EG-NEXT: LSHR T34.X, PV.W, literal.x, 7218; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 7219; EG-NEXT: ASHR * T35.W, T20.X, literal.z, 7220; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) 7221; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7222; EG-NEXT: LSHR T36.X, PV.W, literal.x, 7223; EG-NEXT: ASHR T35.Z, T20.X, literal.y, 7224; EG-NEXT: ASHR * T37.W, T20.Y, literal.z, 7225; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7226; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7227; EG-NEXT: BFE_INT T35.X, T20.X, 0.0, literal.x, 7228; EG-NEXT: ASHR * T37.Z, T20.Y, literal.x, 7229; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7230; EG-NEXT: BFE_INT T37.X, T20.Y, 0.0, literal.x, 7231; EG-NEXT: ASHR T35.Y, PV.X, literal.y, 7232; EG-NEXT: ASHR * T19.W, T20.Z, literal.y, 7233; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7234; EG-NEXT: ALU clause starting at 87: 7235; EG-NEXT: ASHR T19.Z, T20.Z, literal.x, 7236; EG-NEXT: ASHR * T41.W, T20.W, literal.y, 7237; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7238; EG-NEXT: BFE_INT T19.X, T20.Z, 0.0, literal.x, 7239; EG-NEXT: ASHR T37.Y, T37.X, literal.y, 7240; EG-NEXT: ASHR T41.Z, T20.W, literal.x, 7241; EG-NEXT: ASHR * T42.W, T40.X, literal.y, BS:VEC_120/SCL_212 7242; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7243; EG-NEXT: BFE_INT T41.X, T20.W, 0.0, literal.x, 7244; EG-NEXT: ASHR T19.Y, PV.X, literal.y, 7245; EG-NEXT: ASHR T42.Z, T40.X, literal.x, 7246; EG-NEXT: ASHR * T20.W, T40.Y, literal.y, 7247; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7248; EG-NEXT: BFE_INT T42.X, T40.X, 0.0, literal.x, 7249; EG-NEXT: ASHR T41.Y, PV.X, literal.y, 7250; EG-NEXT: ASHR T20.Z, T40.Y, literal.x, 7251; EG-NEXT: ASHR * T43.W, T40.Z, literal.y, 7252; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7253; EG-NEXT: BFE_INT T20.X, T40.Y, 0.0, literal.x, 7254; EG-NEXT: ASHR T42.Y, PV.X, literal.y, 7255; EG-NEXT: ASHR T43.Z, T40.Z, literal.x, 7256; EG-NEXT: ASHR * T44.W, T40.W, literal.y, 7257; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7258; EG-NEXT: BFE_INT T43.X, T40.Z, 0.0, literal.x, 7259; EG-NEXT: ASHR T20.Y, PV.X, literal.y, 7260; EG-NEXT: ASHR T44.Z, T40.W, literal.x, 7261; EG-NEXT: ASHR * T45.W, T39.X, literal.y, 7262; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7263; EG-NEXT: BFE_INT T44.X, T40.W, 0.0, literal.x, 7264; EG-NEXT: ASHR T43.Y, PV.X, literal.y, 7265; EG-NEXT: ASHR T45.Z, T39.X, literal.x, 7266; EG-NEXT: ASHR * T40.W, T39.Y, literal.y, 7267; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7268; EG-NEXT: BFE_INT T45.X, T39.X, 0.0, literal.x, 7269; EG-NEXT: ASHR T44.Y, PV.X, literal.y, 7270; EG-NEXT: ASHR T40.Z, T39.Y, literal.x, 7271; EG-NEXT: ASHR * T46.W, T39.Z, literal.y, 7272; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7273; EG-NEXT: BFE_INT T40.X, T39.Y, 0.0, literal.x, 7274; EG-NEXT: ASHR T45.Y, PV.X, literal.y, 7275; EG-NEXT: ASHR T46.Z, T39.Z, literal.x, 7276; EG-NEXT: ASHR * T47.W, T39.W, literal.y, 7277; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7278; EG-NEXT: BFE_INT T46.X, T39.Z, 0.0, literal.x, 7279; EG-NEXT: ASHR T40.Y, PV.X, literal.y, 7280; EG-NEXT: ASHR T47.Z, T39.W, literal.x, 7281; EG-NEXT: ASHR * T48.W, T38.X, literal.y, 7282; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7283; EG-NEXT: BFE_INT T47.X, T39.W, 0.0, literal.x, 7284; EG-NEXT: ASHR T46.Y, PV.X, literal.y, 7285; EG-NEXT: ASHR T48.Z, T38.X, literal.x, 7286; EG-NEXT: ASHR * T39.W, T38.Y, literal.y, 7287; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7288; EG-NEXT: BFE_INT T48.X, T38.X, 0.0, literal.x, 7289; EG-NEXT: ASHR T47.Y, PV.X, literal.y, 7290; EG-NEXT: ASHR T39.Z, T38.Y, literal.x, 7291; EG-NEXT: ASHR * T49.W, T38.Z, literal.y, 7292; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7293; EG-NEXT: BFE_INT T39.X, T38.Y, 0.0, literal.x, 7294; EG-NEXT: ASHR T48.Y, PV.X, literal.y, 7295; EG-NEXT: ASHR T49.Z, T38.Z, literal.x, 7296; EG-NEXT: ASHR * T50.W, T38.W, literal.y, 7297; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7298; EG-NEXT: BFE_INT T49.X, T38.Z, 0.0, literal.x, 7299; EG-NEXT: ASHR T39.Y, PV.X, literal.y, 7300; EG-NEXT: ASHR * T50.Z, T38.W, literal.x, 7301; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7302; EG-NEXT: BFE_INT T50.X, T38.W, 0.0, literal.x, 7303; EG-NEXT: ASHR T49.Y, PV.X, literal.y, 7304; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 7305; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7306; EG-NEXT: 240(3.363116e-43), 0(0.000000e+00) 7307; EG-NEXT: LSHR T38.X, PV.W, literal.x, 7308; EG-NEXT: ASHR * T50.Y, PV.X, literal.y, 7309; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 7310 %load = load <32 x i16>, <32 x i16> addrspace(4)* %in 7311 %ext = sext <32 x i16> %load to <32 x i64> 7312 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 7313 ret void 7314} 7315 7316; These trigger undefined register machine verifier errors 7317 7318; define amdgpu_kernel void @constant_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 { 7319; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in 7320; %ext = zext <64 x i16> %load to <64 x i64> 7321; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 7322; ret void 7323; } 7324 7325; define amdgpu_kernel void @constant_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(4)* %in) #0 { 7326; %load = load <64 x i16>, <64 x i16> addrspace(4)* %in 7327; %ext = sext <64 x i16> %load to <64 x i64> 7328; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 7329; ret void 7330; } 7331 7332attributes #0 = { nounwind } 7333