1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-NOHSA-SI %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-HSA %s 4; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=GCN-NOHSA-VI %s 5; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=EG %s 6; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap --check-prefix=CM %s 7 8; FIXME: r600 is broken because the bigger testcases spill and it's not implemented 9 10define amdgpu_kernel void @global_load_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 11; GCN-NOHSA-SI-LABEL: global_load_i16: 12; GCN-NOHSA-SI: ; %bb.0: ; %entry 13; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 14; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 15; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 16; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 17; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 18; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 19; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 20; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 21; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 22; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 23; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 24; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 25; GCN-NOHSA-SI-NEXT: buffer_store_short v0, off, s[4:7], 0 26; GCN-NOHSA-SI-NEXT: s_endpgm 27; 28; GCN-HSA-LABEL: global_load_i16: 29; GCN-HSA: ; %bb.0: ; %entry 30; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 31; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 32; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 33; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 34; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1] 35; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 36; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 37; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 38; GCN-HSA-NEXT: flat_store_short v[0:1], v2 39; GCN-HSA-NEXT: s_endpgm 40; 41; GCN-NOHSA-VI-LABEL: global_load_i16: 42; GCN-NOHSA-VI: ; %bb.0: ; %entry 43; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 44; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 45; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 46; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 47; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 48; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 49; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 50; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 51; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 52; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 53; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 54; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 55; GCN-NOHSA-VI-NEXT: buffer_store_short v0, off, s[4:7], 0 56; GCN-NOHSA-VI-NEXT: s_endpgm 57; 58; EG-LABEL: global_load_i16: 59; EG: ; %bb.0: ; %entry 60; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 61; EG-NEXT: TEX 0 @6 62; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 63; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 64; EG-NEXT: CF_END 65; EG-NEXT: PAD 66; EG-NEXT: Fetch clause starting at 6: 67; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 68; EG-NEXT: ALU clause starting at 8: 69; EG-NEXT: MOV * T0.X, KC0[2].Z, 70; EG-NEXT: ALU clause starting at 9: 71; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 72; EG-NEXT: AND_INT * T1.W, T0.X, literal.y, 73; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 74; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 75; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 76; EG-NEXT: LSHL T0.X, T1.W, PV.W, 77; EG-NEXT: LSHL * T0.W, literal.x, PV.W, 78; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 79; EG-NEXT: MOV T0.Y, 0.0, 80; EG-NEXT: MOV * T0.Z, 0.0, 81; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 82; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 83; 84; CM-LABEL: global_load_i16: 85; CM: ; %bb.0: ; %entry 86; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 87; CM-NEXT: TEX 0 @6 88; CM-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 89; CM-NEXT: MEM_RAT MSKOR T0.XW, T1.X 90; CM-NEXT: CF_END 91; CM-NEXT: PAD 92; CM-NEXT: Fetch clause starting at 6: 93; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 94; CM-NEXT: ALU clause starting at 8: 95; CM-NEXT: MOV * T0.X, KC0[2].Z, 96; CM-NEXT: ALU clause starting at 9: 97; CM-NEXT: AND_INT * T0.W, KC0[2].Y, literal.x, 98; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00) 99; CM-NEXT: AND_INT T0.Z, T0.X, literal.x, 100; CM-NEXT: LSHL * T0.W, PV.W, literal.y, 101; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 102; CM-NEXT: LSHL T0.X, PV.Z, PV.W, 103; CM-NEXT: LSHL * T0.W, literal.x, PV.W, 104; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 105; CM-NEXT: MOV T0.Y, 0.0, 106; CM-NEXT: MOV * T0.Z, 0.0, 107; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 108; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 109entry: 110 %ld = load i16, i16 addrspace(1)* %in 111 store i16 %ld, i16 addrspace(1)* %out 112 ret void 113} 114 115define amdgpu_kernel void @global_load_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 116; GCN-NOHSA-SI-LABEL: global_load_v2i16: 117; GCN-NOHSA-SI: ; %bb.0: ; %entry 118; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 119; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 120; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 121; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 122; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 123; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 124; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 125; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 126; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 127; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 128; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 129; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 130; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 131; GCN-NOHSA-SI-NEXT: s_endpgm 132; 133; GCN-HSA-LABEL: global_load_v2i16: 134; GCN-HSA: ; %bb.0: ; %entry 135; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 136; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 137; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 138; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 139; GCN-HSA-NEXT: flat_load_dword v2, v[0:1] 140; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 141; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 142; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 143; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 144; GCN-HSA-NEXT: s_endpgm 145; 146; GCN-NOHSA-VI-LABEL: global_load_v2i16: 147; GCN-NOHSA-VI: ; %bb.0: ; %entry 148; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 149; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 150; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 151; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 152; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 153; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 154; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 155; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 156; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 157; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 158; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 159; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 160; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 161; GCN-NOHSA-VI-NEXT: s_endpgm 162; 163; EG-LABEL: global_load_v2i16: 164; EG: ; %bb.0: ; %entry 165; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 166; EG-NEXT: TEX 0 @6 167; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 168; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 169; EG-NEXT: CF_END 170; EG-NEXT: PAD 171; EG-NEXT: Fetch clause starting at 6: 172; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 173; EG-NEXT: ALU clause starting at 8: 174; EG-NEXT: MOV * T0.X, KC0[2].Z, 175; EG-NEXT: ALU clause starting at 9: 176; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 177; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 178; 179; CM-LABEL: global_load_v2i16: 180; CM: ; %bb.0: ; %entry 181; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 182; CM-NEXT: TEX 0 @6 183; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 184; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 185; CM-NEXT: CF_END 186; CM-NEXT: PAD 187; CM-NEXT: Fetch clause starting at 6: 188; CM-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 189; CM-NEXT: ALU clause starting at 8: 190; CM-NEXT: MOV * T0.X, KC0[2].Z, 191; CM-NEXT: ALU clause starting at 9: 192; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 193; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 194entry: 195 %ld = load <2 x i16>, <2 x i16> addrspace(1)* %in 196 store <2 x i16> %ld, <2 x i16> addrspace(1)* %out 197 ret void 198} 199 200define amdgpu_kernel void @global_load_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { 201; GCN-NOHSA-SI-LABEL: global_load_v3i16: 202; GCN-NOHSA-SI: ; %bb.0: ; %entry 203; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 204; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 205; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 206; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 207; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 208; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 209; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 210; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 211; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 212; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 213; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 214; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 215; GCN-NOHSA-SI-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4 216; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 217; GCN-NOHSA-SI-NEXT: s_endpgm 218; 219; GCN-HSA-LABEL: global_load_v3i16: 220; GCN-HSA: ; %bb.0: ; %entry 221; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 222; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 223; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 224; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 225; GCN-HSA-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 226; GCN-HSA-NEXT: s_add_u32 s2, s0, 4 227; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 228; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 229; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 230; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 231; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 232; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 233; GCN-HSA-NEXT: flat_store_short v[4:5], v1 234; GCN-HSA-NEXT: flat_store_dword v[2:3], v0 235; GCN-HSA-NEXT: s_endpgm 236; 237; GCN-NOHSA-VI-LABEL: global_load_v3i16: 238; GCN-NOHSA-VI: ; %bb.0: ; %entry 239; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 240; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 241; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 242; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 243; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 244; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 245; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 246; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 247; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 248; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 249; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 250; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 251; GCN-NOHSA-VI-NEXT: buffer_store_short v1, off, s[4:7], 0 offset:4 252; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 253; GCN-NOHSA-VI-NEXT: s_endpgm 254; 255; EG-LABEL: global_load_v3i16: 256; EG: ; %bb.0: ; %entry 257; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 258; EG-NEXT: TEX 2 @6 259; EG-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[] 260; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 0 261; EG-NEXT: MEM_RAT MSKOR T5.XW, T8.X 262; EG-NEXT: CF_END 263; EG-NEXT: Fetch clause starting at 6: 264; EG-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1 265; EG-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1 266; EG-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1 267; EG-NEXT: ALU clause starting at 12: 268; EG-NEXT: MOV * T5.X, KC0[2].Z, 269; EG-NEXT: ALU clause starting at 13: 270; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 271; EG-NEXT: 4(5.605194e-45), 0(0.000000e+00) 272; EG-NEXT: AND_INT T1.W, PV.W, literal.x, 273; EG-NEXT: AND_INT * T2.W, T5.X, literal.y, 274; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 275; EG-NEXT: LSHL * T1.W, PV.W, literal.x, 276; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 277; EG-NEXT: LSHL T5.X, T2.W, PV.W, 278; EG-NEXT: LSHL * T5.W, literal.x, PV.W, 279; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 280; EG-NEXT: MOV T5.Y, 0.0, 281; EG-NEXT: MOV * T5.Z, 0.0, 282; EG-NEXT: LSHR T8.X, T0.W, literal.x, 283; EG-NEXT: LSHL T0.W, T7.X, literal.y, 284; EG-NEXT: AND_INT * T1.W, T6.X, literal.z, 285; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 286; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 287; EG-NEXT: OR_INT T6.X, PV.W, PS, 288; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, 289; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 290; 291; CM-LABEL: global_load_v3i16: 292; CM: ; %bb.0: ; %entry 293; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 294; CM-NEXT: TEX 2 @6 295; CM-NEXT: ALU 19, @13, KC0[CB0:0-32], KC1[] 296; CM-NEXT: MEM_RAT MSKOR T5.XW, T8.X 297; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6.X, T7.X 298; CM-NEXT: CF_END 299; CM-NEXT: Fetch clause starting at 6: 300; CM-NEXT: VTX_READ_16 T6.X, T5.X, 0, #1 301; CM-NEXT: VTX_READ_16 T7.X, T5.X, 2, #1 302; CM-NEXT: VTX_READ_16 T5.X, T5.X, 4, #1 303; CM-NEXT: ALU clause starting at 12: 304; CM-NEXT: MOV * T5.X, KC0[2].Z, 305; CM-NEXT: ALU clause starting at 13: 306; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 307; CM-NEXT: 4(5.605194e-45), 0(0.000000e+00) 308; CM-NEXT: AND_INT * T1.W, PV.W, literal.x, 309; CM-NEXT: 3(4.203895e-45), 0(0.000000e+00) 310; CM-NEXT: AND_INT T0.Z, T5.X, literal.x, 311; CM-NEXT: LSHL * T1.W, PV.W, literal.y, 312; CM-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 313; CM-NEXT: LSHL T5.X, PV.Z, PV.W, 314; CM-NEXT: LSHL * T5.W, literal.x, PV.W, 315; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 316; CM-NEXT: MOV T5.Y, 0.0, 317; CM-NEXT: MOV * T5.Z, 0.0, 318; CM-NEXT: LSHL T0.Z, T7.X, literal.x, 319; CM-NEXT: AND_INT * T1.W, T6.X, literal.y, BS:VEC_120/SCL_212 320; CM-NEXT: 16(2.242078e-44), 65535(9.183409e-41) 321; CM-NEXT: OR_INT * T6.X, PV.Z, PV.W, 322; CM-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, 323; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 324; CM-NEXT: LSHR * T8.X, T0.W, literal.x, 325; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 326entry: 327 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in 328 store <3 x i16> %ld, <3 x i16> addrspace(1)* %out 329 ret void 330} 331 332define amdgpu_kernel void @global_load_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 333; GCN-NOHSA-SI-LABEL: global_load_v4i16: 334; GCN-NOHSA-SI: ; %bb.0: ; %entry 335; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 336; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 337; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 338; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 339; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 340; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 341; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 342; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 343; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 344; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 345; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 346; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 347; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 348; GCN-NOHSA-SI-NEXT: s_endpgm 349; 350; GCN-HSA-LABEL: global_load_v4i16: 351; GCN-HSA: ; %bb.0: ; %entry 352; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 353; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 354; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 355; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 356; GCN-HSA-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 357; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 358; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 359; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 360; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 361; GCN-HSA-NEXT: s_endpgm 362; 363; GCN-NOHSA-VI-LABEL: global_load_v4i16: 364; GCN-NOHSA-VI: ; %bb.0: ; %entry 365; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 366; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 367; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 368; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 369; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 370; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 371; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 372; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 373; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 374; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 375; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 376; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 377; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 378; GCN-NOHSA-VI-NEXT: s_endpgm 379; 380; EG-LABEL: global_load_v4i16: 381; EG: ; %bb.0: ; %entry 382; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 383; EG-NEXT: TEX 0 @6 384; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 385; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 386; EG-NEXT: CF_END 387; EG-NEXT: PAD 388; EG-NEXT: Fetch clause starting at 6: 389; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 390; EG-NEXT: ALU clause starting at 8: 391; EG-NEXT: MOV * T0.X, KC0[2].Z, 392; EG-NEXT: ALU clause starting at 9: 393; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 394; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 395; 396; CM-LABEL: global_load_v4i16: 397; CM: ; %bb.0: ; %entry 398; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 399; CM-NEXT: TEX 0 @6 400; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 401; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 402; CM-NEXT: CF_END 403; CM-NEXT: PAD 404; CM-NEXT: Fetch clause starting at 6: 405; CM-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 406; CM-NEXT: ALU clause starting at 8: 407; CM-NEXT: MOV * T0.X, KC0[2].Z, 408; CM-NEXT: ALU clause starting at 9: 409; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 410; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 411entry: 412 %ld = load <4 x i16>, <4 x i16> addrspace(1)* %in 413 store <4 x i16> %ld, <4 x i16> addrspace(1)* %out 414 ret void 415} 416 417define amdgpu_kernel void @global_load_v8i16(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) { 418; GCN-NOHSA-SI-LABEL: global_load_v8i16: 419; GCN-NOHSA-SI: ; %bb.0: ; %entry 420; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 421; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 422; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 423; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 424; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 425; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 426; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 427; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 428; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 429; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 430; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 431; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 432; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 433; GCN-NOHSA-SI-NEXT: s_endpgm 434; 435; GCN-HSA-LABEL: global_load_v8i16: 436; GCN-HSA: ; %bb.0: ; %entry 437; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 438; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 439; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 440; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 441; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 442; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 443; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 444; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 445; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 446; GCN-HSA-NEXT: s_endpgm 447; 448; GCN-NOHSA-VI-LABEL: global_load_v8i16: 449; GCN-NOHSA-VI: ; %bb.0: ; %entry 450; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 451; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 452; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 453; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 454; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 455; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 456; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 457; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 458; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 459; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 460; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 461; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 462; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 463; GCN-NOHSA-VI-NEXT: s_endpgm 464; 465; EG-LABEL: global_load_v8i16: 466; EG: ; %bb.0: ; %entry 467; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 468; EG-NEXT: TEX 0 @6 469; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 470; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 471; EG-NEXT: CF_END 472; EG-NEXT: PAD 473; EG-NEXT: Fetch clause starting at 6: 474; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 475; EG-NEXT: ALU clause starting at 8: 476; EG-NEXT: MOV * T0.X, KC0[2].Z, 477; EG-NEXT: ALU clause starting at 9: 478; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 479; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 480; 481; CM-LABEL: global_load_v8i16: 482; CM: ; %bb.0: ; %entry 483; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 484; CM-NEXT: TEX 0 @6 485; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 486; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 487; CM-NEXT: CF_END 488; CM-NEXT: PAD 489; CM-NEXT: Fetch clause starting at 6: 490; CM-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 491; CM-NEXT: ALU clause starting at 8: 492; CM-NEXT: MOV * T0.X, KC0[2].Z, 493; CM-NEXT: ALU clause starting at 9: 494; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 495; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 496entry: 497 %ld = load <8 x i16>, <8 x i16> addrspace(1)* %in 498 store <8 x i16> %ld, <8 x i16> addrspace(1)* %out 499 ret void 500} 501 502define amdgpu_kernel void @global_load_v16i16(<16 x i16> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) { 503; GCN-NOHSA-SI-LABEL: global_load_v16i16: 504; GCN-NOHSA-SI: ; %bb.0: ; %entry 505; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 506; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 507; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 508; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 509; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 510; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 511; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 512; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 513; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16 514; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 515; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 516; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 517; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 518; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 519; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 520; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 521; GCN-NOHSA-SI-NEXT: s_endpgm 522; 523; GCN-HSA-LABEL: global_load_v16i16: 524; GCN-HSA: ; %bb.0: ; %entry 525; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 526; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 527; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 528; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 529; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 530; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 531; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 532; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 533; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 534; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 535; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 536; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 537; GCN-HSA-NEXT: v_mov_b32_e32 v11, s5 538; GCN-HSA-NEXT: v_mov_b32_e32 v9, s1 539; GCN-HSA-NEXT: v_mov_b32_e32 v10, s4 540; GCN-HSA-NEXT: v_mov_b32_e32 v8, s0 541; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 542; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3] 543; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 544; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 545; GCN-HSA-NEXT: s_endpgm 546; 547; GCN-NOHSA-VI-LABEL: global_load_v16i16: 548; GCN-NOHSA-VI: ; %bb.0: ; %entry 549; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 550; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 551; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 552; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 553; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 554; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 555; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 556; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 557; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16 558; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 559; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 560; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 561; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 562; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 563; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 564; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 565; GCN-NOHSA-VI-NEXT: s_endpgm 566; 567; EG-LABEL: global_load_v16i16: 568; EG: ; %bb.0: ; %entry 569; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 570; EG-NEXT: TEX 0 @8 571; EG-NEXT: ALU 1, @13, KC0[CB0:0-32], KC1[] 572; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 573; EG-NEXT: TEX 0 @10 574; EG-NEXT: ALU 3, @15, KC0[CB0:0-32], KC1[] 575; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 576; EG-NEXT: CF_END 577; EG-NEXT: Fetch clause starting at 8: 578; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 0, #1 579; EG-NEXT: Fetch clause starting at 10: 580; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1 581; EG-NEXT: ALU clause starting at 12: 582; EG-NEXT: MOV * T0.X, KC0[2].Z, 583; EG-NEXT: ALU clause starting at 13: 584; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, 585; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 586; EG-NEXT: ALU clause starting at 15: 587; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, 588; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 589; EG-NEXT: LSHR * T1.X, PV.W, literal.x, 590; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 591; 592; CM-LABEL: global_load_v16i16: 593; CM: ; %bb.0: ; %entry 594; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 595; CM-NEXT: TEX 0 @8 596; CM-NEXT: ALU 1, @13, KC0[CB0:0-32], KC1[] 597; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T2.X 598; CM-NEXT: TEX 0 @10 599; CM-NEXT: ALU 3, @15, KC0[CB0:0-32], KC1[] 600; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 601; CM-NEXT: CF_END 602; CM-NEXT: Fetch clause starting at 8: 603; CM-NEXT: VTX_READ_128 T1.XYZW, T0.X, 0, #1 604; CM-NEXT: Fetch clause starting at 10: 605; CM-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1 606; CM-NEXT: ALU clause starting at 12: 607; CM-NEXT: MOV * T0.X, KC0[2].Z, 608; CM-NEXT: ALU clause starting at 13: 609; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, 610; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 611; CM-NEXT: ALU clause starting at 15: 612; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.x, 613; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 614; CM-NEXT: LSHR * T1.X, PV.W, literal.x, 615; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 616entry: 617 %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in 618 store <16 x i16> %ld, <16 x i16> addrspace(1)* %out 619 ret void 620} 621 622define amdgpu_kernel void @global_load_v16i16_align2(<16 x i16> addrspace(1)* %in, <16 x i16> addrspace(1)* %out) #0 { 623; GCN-NOHSA-SI-LABEL: global_load_v16i16_align2: 624; GCN-NOHSA-SI: ; %bb.0: ; %entry 625; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 626; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 627; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 628; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 629; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 630; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 631; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 632; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 633; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 634; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 635; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 636; GCN-NOHSA-SI-NEXT: buffer_load_ushort v1, off, s[4:7], 0 offset:2 637; GCN-NOHSA-SI-NEXT: buffer_load_ushort v4, off, s[4:7], 0 offset:4 638; GCN-NOHSA-SI-NEXT: buffer_load_ushort v2, off, s[4:7], 0 offset:6 639; GCN-NOHSA-SI-NEXT: buffer_load_ushort v5, off, s[4:7], 0 offset:8 640; GCN-NOHSA-SI-NEXT: buffer_load_ushort v3, off, s[4:7], 0 offset:10 641; GCN-NOHSA-SI-NEXT: buffer_load_ushort v6, off, s[4:7], 0 offset:12 642; GCN-NOHSA-SI-NEXT: buffer_load_ushort v7, off, s[4:7], 0 offset:14 643; GCN-NOHSA-SI-NEXT: buffer_load_ushort v8, off, s[4:7], 0 offset:16 644; GCN-NOHSA-SI-NEXT: buffer_load_ushort v9, off, s[4:7], 0 offset:18 645; GCN-NOHSA-SI-NEXT: buffer_load_ushort v10, off, s[4:7], 0 offset:20 646; GCN-NOHSA-SI-NEXT: buffer_load_ushort v11, off, s[4:7], 0 offset:22 647; GCN-NOHSA-SI-NEXT: buffer_load_ushort v12, off, s[4:7], 0 offset:24 648; GCN-NOHSA-SI-NEXT: buffer_load_ushort v13, off, s[4:7], 0 offset:26 649; GCN-NOHSA-SI-NEXT: buffer_load_ushort v14, off, s[4:7], 0 offset:28 650; GCN-NOHSA-SI-NEXT: buffer_load_ushort v15, off, s[4:7], 0 offset:30 651; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(8) 652; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v7, 16, v7 653; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v16, 16, v3 654; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v17, 16, v2 655; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v18, 16, v1 656; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 657; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v15, 16, v15 658; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v13, 16, v13 659; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v11, 16, v11 660; GCN-NOHSA-SI-NEXT: v_lshlrev_b32_e32 v9, 16, v9 661; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v3, v7, v6 662; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v2, v16, v5 663; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v1, v17, v4 664; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v0, v18, v0 665; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v7, v15, v14 666; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v6, v13, v12 667; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v5, v11, v10 668; GCN-NOHSA-SI-NEXT: v_or_b32_e32 v4, v9, v8 669; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[8:11], 0 offset:16 670; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 671; GCN-NOHSA-SI-NEXT: s_endpgm 672; 673; GCN-HSA-LABEL: global_load_v16i16_align2: 674; GCN-HSA: ; %bb.0: ; %entry 675; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 676; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 677; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 678; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 679; GCN-HSA-NEXT: s_add_u32 s0, s0, 16 680; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 681; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 682; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 683; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 684; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 685; GCN-HSA-NEXT: s_add_u32 s0, s2, 16 686; GCN-HSA-NEXT: s_addc_u32 s1, s3, 0 687; GCN-HSA-NEXT: v_mov_b32_e32 v11, s1 688; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 689; GCN-HSA-NEXT: v_mov_b32_e32 v10, s0 690; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 691; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 692; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3] 693; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 694; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 695; GCN-HSA-NEXT: s_endpgm 696; 697; GCN-NOHSA-VI-LABEL: global_load_v16i16_align2: 698; GCN-NOHSA-VI: ; %bb.0: ; %entry 699; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 700; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 701; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 702; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 703; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 704; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 705; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0 offset:14 706; GCN-NOHSA-VI-NEXT: buffer_load_ushort v1, off, s[4:7], 0 offset:10 707; GCN-NOHSA-VI-NEXT: buffer_load_ushort v2, off, s[4:7], 0 offset:6 708; GCN-NOHSA-VI-NEXT: buffer_load_ushort v3, off, s[4:7], 0 offset:2 709; GCN-NOHSA-VI-NEXT: buffer_load_ushort v4, off, s[4:7], 0 offset:30 710; GCN-NOHSA-VI-NEXT: buffer_load_ushort v5, off, s[4:7], 0 offset:26 711; GCN-NOHSA-VI-NEXT: buffer_load_ushort v6, off, s[4:7], 0 offset:22 712; GCN-NOHSA-VI-NEXT: buffer_load_ushort v7, off, s[4:7], 0 offset:18 713; GCN-NOHSA-VI-NEXT: buffer_load_ushort v8, off, s[4:7], 0 offset:12 714; GCN-NOHSA-VI-NEXT: buffer_load_ushort v9, off, s[4:7], 0 offset:8 715; GCN-NOHSA-VI-NEXT: buffer_load_ushort v10, off, s[4:7], 0 offset:4 716; GCN-NOHSA-VI-NEXT: buffer_load_ushort v11, off, s[4:7], 0 717; GCN-NOHSA-VI-NEXT: buffer_load_ushort v12, off, s[4:7], 0 offset:28 718; GCN-NOHSA-VI-NEXT: buffer_load_ushort v13, off, s[4:7], 0 offset:24 719; GCN-NOHSA-VI-NEXT: buffer_load_ushort v14, off, s[4:7], 0 offset:20 720; GCN-NOHSA-VI-NEXT: buffer_load_ushort v15, off, s[4:7], 0 offset:16 721; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s2 722; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s3 723; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(14) 724; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 725; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 726; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(13) 727; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v16, 16, v2 728; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(12) 729; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v17, 16, v3 730; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(11) 731; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 732; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(10) 733; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 734; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(9) 735; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v18, 16, v6 736; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(8) 737; GCN-NOHSA-VI-NEXT: v_lshlrev_b32_e32 v19, 16, v7 738; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(7) 739; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v3, v8, v0 740; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(6) 741; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v2, v9, v1 742; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(5) 743; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v1, v10, v16 744; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(4) 745; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v0, v11, v17 746; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 747; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v7, v12, v4 748; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(2) 749; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v6, v13, v5 750; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 751; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v5, v14, v18 752; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 753; GCN-NOHSA-VI-NEXT: v_or_b32_e32 v4, v15, v19 754; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 755; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 756; GCN-NOHSA-VI-NEXT: s_endpgm 757; 758; EG-LABEL: global_load_v16i16_align2: 759; EG: ; %bb.0: ; %entry 760; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 761; EG-NEXT: TEX 1 @6 762; EG-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[] 763; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T3.X, 0 764; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T2.X, 1 765; EG-NEXT: CF_END 766; EG-NEXT: Fetch clause starting at 6: 767; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 768; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 769; EG-NEXT: ALU clause starting at 10: 770; EG-NEXT: MOV * T0.X, KC0[2].Y, 771; EG-NEXT: ALU clause starting at 11: 772; EG-NEXT: LSHR T2.X, KC0[2].Z, literal.x, 773; EG-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.y, 774; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 775; EG-NEXT: LSHR * T3.X, PV.W, literal.x, 776; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 777; 778; CM-LABEL: global_load_v16i16_align2: 779; CM: ; %bb.0: ; %entry 780; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 781; CM-NEXT: TEX 1 @6 782; CM-NEXT: ALU 5, @11, KC0[CB0:0-32], KC1[] 783; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T3.X 784; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1, T2.X 785; CM-NEXT: CF_END 786; CM-NEXT: Fetch clause starting at 6: 787; CM-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 788; CM-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 789; CM-NEXT: ALU clause starting at 10: 790; CM-NEXT: MOV * T0.X, KC0[2].Y, 791; CM-NEXT: ALU clause starting at 11: 792; CM-NEXT: ADD_INT * T2.W, KC0[2].Z, literal.x, 793; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 794; CM-NEXT: LSHR * T2.X, PV.W, literal.x, 795; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 796; CM-NEXT: LSHR * T3.X, KC0[2].Z, literal.x, 797; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 798entry: 799 %ld = load <16 x i16>, <16 x i16> addrspace(1)* %in, align 2 800 store <16 x i16> %ld, <16 x i16> addrspace(1)* %out, align 32 801 ret void 802} 803 804define amdgpu_kernel void @global_zextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 805; GCN-NOHSA-SI-LABEL: global_zextload_i16_to_i32: 806; GCN-NOHSA-SI: ; %bb.0: 807; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 808; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 809; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 810; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 811; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 812; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 813; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 814; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 815; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 816; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 817; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 818; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 819; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 820; GCN-NOHSA-SI-NEXT: s_endpgm 821; 822; GCN-HSA-LABEL: global_zextload_i16_to_i32: 823; GCN-HSA: ; %bb.0: 824; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 825; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 826; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 827; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 828; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1] 829; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 830; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 831; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 832; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 833; GCN-HSA-NEXT: s_endpgm 834; 835; GCN-NOHSA-VI-LABEL: global_zextload_i16_to_i32: 836; GCN-NOHSA-VI: ; %bb.0: 837; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 838; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 839; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 840; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 841; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 842; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 843; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 844; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 845; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 846; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 847; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 848; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 849; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 850; GCN-NOHSA-VI-NEXT: s_endpgm 851; 852; EG-LABEL: global_zextload_i16_to_i32: 853; EG: ; %bb.0: 854; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 855; EG-NEXT: TEX 0 @6 856; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 857; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 858; EG-NEXT: CF_END 859; EG-NEXT: PAD 860; EG-NEXT: Fetch clause starting at 6: 861; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 862; EG-NEXT: ALU clause starting at 8: 863; EG-NEXT: MOV * T0.X, KC0[2].Z, 864; EG-NEXT: ALU clause starting at 9: 865; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 866; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 867; 868; CM-LABEL: global_zextload_i16_to_i32: 869; CM: ; %bb.0: 870; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 871; CM-NEXT: TEX 0 @6 872; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 873; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 874; CM-NEXT: CF_END 875; CM-NEXT: PAD 876; CM-NEXT: Fetch clause starting at 6: 877; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 878; CM-NEXT: ALU clause starting at 8: 879; CM-NEXT: MOV * T0.X, KC0[2].Z, 880; CM-NEXT: ALU clause starting at 9: 881; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 882; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 883 %a = load i16, i16 addrspace(1)* %in 884 %ext = zext i16 %a to i32 885 store i32 %ext, i32 addrspace(1)* %out 886 ret void 887} 888 889define amdgpu_kernel void @global_sextload_i16_to_i32(i32 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 890; GCN-NOHSA-SI-LABEL: global_sextload_i16_to_i32: 891; GCN-NOHSA-SI: ; %bb.0: 892; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 893; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 894; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 895; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 896; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 897; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 898; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 899; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 900; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 901; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 902; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 903; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 904; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 905; GCN-NOHSA-SI-NEXT: s_endpgm 906; 907; GCN-HSA-LABEL: global_sextload_i16_to_i32: 908; GCN-HSA: ; %bb.0: 909; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 910; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 911; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 912; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 913; GCN-HSA-NEXT: flat_load_sshort v2, v[0:1] 914; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 915; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 916; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 917; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 918; GCN-HSA-NEXT: s_endpgm 919; 920; GCN-NOHSA-VI-LABEL: global_sextload_i16_to_i32: 921; GCN-NOHSA-VI: ; %bb.0: 922; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 923; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 924; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 925; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 926; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 927; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 928; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 929; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 930; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 931; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 932; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 933; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 934; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 935; GCN-NOHSA-VI-NEXT: s_endpgm 936; 937; EG-LABEL: global_sextload_i16_to_i32: 938; EG: ; %bb.0: 939; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 940; EG-NEXT: TEX 0 @6 941; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 942; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 943; EG-NEXT: CF_END 944; EG-NEXT: PAD 945; EG-NEXT: Fetch clause starting at 6: 946; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 947; EG-NEXT: ALU clause starting at 8: 948; EG-NEXT: MOV * T0.X, KC0[2].Z, 949; EG-NEXT: ALU clause starting at 9: 950; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 951; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 952; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 953; 954; CM-LABEL: global_sextload_i16_to_i32: 955; CM: ; %bb.0: 956; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 957; CM-NEXT: TEX 0 @6 958; CM-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[] 959; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 960; CM-NEXT: CF_END 961; CM-NEXT: PAD 962; CM-NEXT: Fetch clause starting at 6: 963; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 964; CM-NEXT: ALU clause starting at 8: 965; CM-NEXT: MOV * T0.X, KC0[2].Z, 966; CM-NEXT: ALU clause starting at 9: 967; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x, 968; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 969; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 970; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 971 %a = load i16, i16 addrspace(1)* %in 972 %ext = sext i16 %a to i32 973 store i32 %ext, i32 addrspace(1)* %out 974 ret void 975} 976 977define amdgpu_kernel void @global_zextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { 978; GCN-NOHSA-SI-LABEL: global_zextload_v1i16_to_v1i32: 979; GCN-NOHSA-SI: ; %bb.0: 980; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 981; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 982; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 983; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 984; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 985; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 986; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 987; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 988; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 989; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 990; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 991; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 992; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 993; GCN-NOHSA-SI-NEXT: s_endpgm 994; 995; GCN-HSA-LABEL: global_zextload_v1i16_to_v1i32: 996; GCN-HSA: ; %bb.0: 997; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 998; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 999; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1000; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1001; GCN-HSA-NEXT: flat_load_ushort v2, v[0:1] 1002; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1003; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1004; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1005; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 1006; GCN-HSA-NEXT: s_endpgm 1007; 1008; GCN-NOHSA-VI-LABEL: global_zextload_v1i16_to_v1i32: 1009; GCN-NOHSA-VI: ; %bb.0: 1010; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1011; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1012; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1013; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1014; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1015; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1016; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1017; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1018; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 1019; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1020; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1021; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1022; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 1023; GCN-NOHSA-VI-NEXT: s_endpgm 1024; 1025; EG-LABEL: global_zextload_v1i16_to_v1i32: 1026; EG: ; %bb.0: 1027; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1028; EG-NEXT: TEX 0 @6 1029; EG-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 1030; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 1031; EG-NEXT: CF_END 1032; EG-NEXT: PAD 1033; EG-NEXT: Fetch clause starting at 6: 1034; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1035; EG-NEXT: ALU clause starting at 8: 1036; EG-NEXT: MOV * T0.X, KC0[2].Z, 1037; EG-NEXT: ALU clause starting at 9: 1038; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1039; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1040; 1041; CM-LABEL: global_zextload_v1i16_to_v1i32: 1042; CM: ; %bb.0: 1043; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1044; CM-NEXT: TEX 0 @6 1045; CM-NEXT: ALU 1, @9, KC0[CB0:0-32], KC1[] 1046; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 1047; CM-NEXT: CF_END 1048; CM-NEXT: PAD 1049; CM-NEXT: Fetch clause starting at 6: 1050; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1051; CM-NEXT: ALU clause starting at 8: 1052; CM-NEXT: MOV * T0.X, KC0[2].Z, 1053; CM-NEXT: ALU clause starting at 9: 1054; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1055; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1056 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 1057 %ext = zext <1 x i16> %load to <1 x i32> 1058 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 1059 ret void 1060} 1061 1062define amdgpu_kernel void @global_sextload_v1i16_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { 1063; GCN-NOHSA-SI-LABEL: global_sextload_v1i16_to_v1i32: 1064; GCN-NOHSA-SI: ; %bb.0: 1065; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1066; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1067; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1068; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1069; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1070; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1071; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1072; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1073; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 1074; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1075; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1076; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1077; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 1078; GCN-NOHSA-SI-NEXT: s_endpgm 1079; 1080; GCN-HSA-LABEL: global_sextload_v1i16_to_v1i32: 1081; GCN-HSA: ; %bb.0: 1082; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1083; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1084; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1085; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1086; GCN-HSA-NEXT: flat_load_sshort v2, v[0:1] 1087; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1088; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1089; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1090; GCN-HSA-NEXT: flat_store_dword v[0:1], v2 1091; GCN-HSA-NEXT: s_endpgm 1092; 1093; GCN-NOHSA-VI-LABEL: global_sextload_v1i16_to_v1i32: 1094; GCN-NOHSA-VI: ; %bb.0: 1095; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1096; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1097; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1098; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1099; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1100; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1101; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1102; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1103; GCN-NOHSA-VI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 1104; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1105; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1106; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1107; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 1108; GCN-NOHSA-VI-NEXT: s_endpgm 1109; 1110; EG-LABEL: global_sextload_v1i16_to_v1i32: 1111; EG: ; %bb.0: 1112; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1113; EG-NEXT: TEX 0 @6 1114; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 1115; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 1116; EG-NEXT: CF_END 1117; EG-NEXT: PAD 1118; EG-NEXT: Fetch clause starting at 6: 1119; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1120; EG-NEXT: ALU clause starting at 8: 1121; EG-NEXT: MOV * T0.X, KC0[2].Z, 1122; EG-NEXT: ALU clause starting at 9: 1123; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 1124; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 1125; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1126; 1127; CM-LABEL: global_sextload_v1i16_to_v1i32: 1128; CM: ; %bb.0: 1129; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1130; CM-NEXT: TEX 0 @6 1131; CM-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[] 1132; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X 1133; CM-NEXT: CF_END 1134; CM-NEXT: PAD 1135; CM-NEXT: Fetch clause starting at 6: 1136; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1137; CM-NEXT: ALU clause starting at 8: 1138; CM-NEXT: MOV * T0.X, KC0[2].Z, 1139; CM-NEXT: ALU clause starting at 9: 1140; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x, 1141; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1142; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1143; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1144 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 1145 %ext = sext <1 x i16> %load to <1 x i32> 1146 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 1147 ret void 1148} 1149 1150define amdgpu_kernel void @global_zextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { 1151; GCN-NOHSA-SI-LABEL: global_zextload_v2i16_to_v2i32: 1152; GCN-NOHSA-SI: ; %bb.0: 1153; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1154; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1155; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1156; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1157; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1158; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1159; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1160; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1161; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1162; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1163; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1164; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1165; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1166; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1167; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1168; GCN-NOHSA-SI-NEXT: s_endpgm 1169; 1170; GCN-HSA-LABEL: global_zextload_v2i16_to_v2i32: 1171; GCN-HSA: ; %bb.0: 1172; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1173; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1174; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1175; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1176; GCN-HSA-NEXT: flat_load_dword v2, v[0:1] 1177; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1178; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1179; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1180; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v2 1181; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v2 1182; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1183; GCN-HSA-NEXT: s_endpgm 1184; 1185; GCN-NOHSA-VI-LABEL: global_zextload_v2i16_to_v2i32: 1186; GCN-NOHSA-VI: ; %bb.0: 1187; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1188; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1189; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1190; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1191; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1192; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1193; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1194; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1195; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1196; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1197; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1198; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1199; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1200; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1201; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1202; GCN-NOHSA-VI-NEXT: s_endpgm 1203; 1204; EG-LABEL: global_zextload_v2i16_to_v2i32: 1205; EG: ; %bb.0: 1206; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1207; EG-NEXT: TEX 0 @6 1208; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 1209; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XY, T5.X, 1 1210; EG-NEXT: CF_END 1211; EG-NEXT: PAD 1212; EG-NEXT: Fetch clause starting at 6: 1213; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1214; EG-NEXT: ALU clause starting at 8: 1215; EG-NEXT: MOV * T4.X, KC0[2].Z, 1216; EG-NEXT: ALU clause starting at 9: 1217; EG-NEXT: LSHR * T4.Y, T4.X, literal.x, 1218; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1219; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 1220; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 1221; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1222; 1223; CM-LABEL: global_zextload_v2i16_to_v2i32: 1224; CM: ; %bb.0: 1225; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1226; CM-NEXT: TEX 0 @6 1227; CM-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[] 1228; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X 1229; CM-NEXT: CF_END 1230; CM-NEXT: PAD 1231; CM-NEXT: Fetch clause starting at 6: 1232; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1233; CM-NEXT: ALU clause starting at 8: 1234; CM-NEXT: MOV * T4.X, KC0[2].Z, 1235; CM-NEXT: ALU clause starting at 9: 1236; CM-NEXT: LSHR * T4.Y, T4.X, literal.x, 1237; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1238; CM-NEXT: AND_INT * T4.X, T4.X, literal.x, 1239; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1240; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, 1241; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1242 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 1243 %ext = zext <2 x i16> %load to <2 x i32> 1244 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 1245 ret void 1246} 1247 1248; TODO: This should use ASHR instead of LSHR + BFE 1249define amdgpu_kernel void @global_sextload_v2i16_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { 1250; GCN-NOHSA-SI-LABEL: global_sextload_v2i16_to_v2i32: 1251; GCN-NOHSA-SI: ; %bb.0: 1252; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1253; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1254; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1255; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1256; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1257; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1258; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1259; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1260; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1261; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1262; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1263; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1264; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v0 1265; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v0, 0, 16 1266; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1267; GCN-NOHSA-SI-NEXT: s_endpgm 1268; 1269; GCN-HSA-LABEL: global_sextload_v2i16_to_v2i32: 1270; GCN-HSA: ; %bb.0: 1271; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1272; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1273; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1274; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1275; GCN-HSA-NEXT: flat_load_dword v2, v[0:1] 1276; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 1277; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 1278; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1279; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v2 1280; GCN-HSA-NEXT: v_bfe_i32 v2, v2, 0, 16 1281; GCN-HSA-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1282; GCN-HSA-NEXT: s_endpgm 1283; 1284; GCN-NOHSA-VI-LABEL: global_sextload_v2i16_to_v2i32: 1285; GCN-NOHSA-VI: ; %bb.0: 1286; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1287; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1288; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1289; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1290; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1291; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1292; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1293; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1294; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1295; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1296; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1297; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1298; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v0 1299; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16 1300; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1301; GCN-NOHSA-VI-NEXT: s_endpgm 1302; 1303; EG-LABEL: global_sextload_v2i16_to_v2i32: 1304; EG: ; %bb.0: 1305; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1306; EG-NEXT: TEX 0 @6 1307; EG-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[] 1308; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XY, T4.X, 1 1309; EG-NEXT: CF_END 1310; EG-NEXT: PAD 1311; EG-NEXT: Fetch clause starting at 6: 1312; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1313; EG-NEXT: ALU clause starting at 8: 1314; EG-NEXT: MOV * T4.X, KC0[2].Z, 1315; EG-NEXT: ALU clause starting at 9: 1316; EG-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x, 1317; EG-NEXT: LSHR T0.W, T4.X, literal.x, 1318; EG-NEXT: LSHR * T4.X, KC0[2].Y, literal.y, 1319; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1320; EG-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.x, 1321; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1322; 1323; CM-LABEL: global_sextload_v2i16_to_v2i32: 1324; CM: ; %bb.0: 1325; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1326; CM-NEXT: TEX 0 @6 1327; CM-NEXT: ALU 5, @9, KC0[CB0:0-32], KC1[] 1328; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T4.X 1329; CM-NEXT: CF_END 1330; CM-NEXT: PAD 1331; CM-NEXT: Fetch clause starting at 6: 1332; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 1333; CM-NEXT: ALU clause starting at 8: 1334; CM-NEXT: MOV * T4.X, KC0[2].Z, 1335; CM-NEXT: ALU clause starting at 9: 1336; CM-NEXT: BFE_INT T5.X, T4.X, 0.0, literal.x, 1337; CM-NEXT: LSHR * T0.W, T4.X, literal.x, 1338; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1339; CM-NEXT: LSHR T4.X, KC0[2].Y, literal.x, 1340; CM-NEXT: BFE_INT * T5.Y, PV.W, 0.0, literal.y, 1341; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1342 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 1343 %ext = sext <2 x i16> %load to <2 x i32> 1344 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 1345 ret void 1346} 1347 1348define amdgpu_kernel void @global_zextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { 1349; GCN-NOHSA-SI-LABEL: global_zextload_v3i16_to_v3i32: 1350; GCN-NOHSA-SI: ; %bb.0: ; %entry 1351; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1352; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1353; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1354; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1355; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1356; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1357; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1358; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1359; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 1360; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1361; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1362; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1363; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v0 1364; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v0 1365; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v1 1366; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8 1367; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[4:7], 0 1368; GCN-NOHSA-SI-NEXT: s_endpgm 1369; 1370; GCN-HSA-LABEL: global_zextload_v3i16_to_v3i32: 1371; GCN-HSA: ; %bb.0: ; %entry 1372; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1373; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1374; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1375; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1376; GCN-HSA-NEXT: flat_load_dwordx2 v[3:4], v[0:1] 1377; GCN-HSA-NEXT: v_mov_b32_e32 v5, s0 1378; GCN-HSA-NEXT: v_mov_b32_e32 v6, s1 1379; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1380; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v3 1381; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v4 1382; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v3 1383; GCN-HSA-NEXT: flat_store_dwordx3 v[5:6], v[0:2] 1384; GCN-HSA-NEXT: s_endpgm 1385; 1386; GCN-NOHSA-VI-LABEL: global_zextload_v3i16_to_v3i32: 1387; GCN-NOHSA-VI: ; %bb.0: ; %entry 1388; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1389; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1390; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1391; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1392; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1393; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1394; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1395; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1396; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 1397; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1398; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1399; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1400; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v1 1401; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1402; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1403; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 1404; GCN-NOHSA-VI-NEXT: s_endpgm 1405; 1406; EG-LABEL: global_zextload_v3i16_to_v3i32: 1407; EG: ; %bb.0: ; %entry 1408; EG-NEXT: ALU 4, @12, KC0[CB0:0-32], KC1[] 1409; EG-NEXT: TEX 2 @6 1410; EG-NEXT: ALU 2, @17, KC0[], KC1[] 1411; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T4.X, 0 1412; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XY, T0.X, 1 1413; EG-NEXT: CF_END 1414; EG-NEXT: Fetch clause starting at 6: 1415; EG-NEXT: VTX_READ_16 T2.X, T1.X, 4, #1 1416; EG-NEXT: VTX_READ_16 T3.X, T1.X, 0, #1 1417; EG-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1 1418; EG-NEXT: ALU clause starting at 12: 1419; EG-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 1420; EG-NEXT: MOV * T1.X, KC0[2].Z, 1421; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1422; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 1423; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1424; EG-NEXT: ALU clause starting at 17: 1425; EG-NEXT: LSHR T4.X, T0.W, literal.x, 1426; EG-NEXT: MOV * T3.Y, T1.X, 1427; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1428; 1429; CM-LABEL: global_zextload_v3i16_to_v3i32: 1430; CM: ; %bb.0: ; %entry 1431; CM-NEXT: ALU 4, @12, KC0[CB0:0-32], KC1[] 1432; CM-NEXT: TEX 2 @6 1433; CM-NEXT: ALU 2, @17, KC0[CB0:0-32], KC1[] 1434; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T3, T4.X 1435; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T2.X, T0.X 1436; CM-NEXT: CF_END 1437; CM-NEXT: Fetch clause starting at 6: 1438; CM-NEXT: VTX_READ_16 T2.X, T1.X, 4, #1 1439; CM-NEXT: VTX_READ_16 T3.X, T1.X, 0, #1 1440; CM-NEXT: VTX_READ_16 T1.X, T1.X, 2, #1 1441; CM-NEXT: ALU clause starting at 12: 1442; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 1443; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00) 1444; CM-NEXT: LSHR * T0.X, PV.W, literal.x, 1445; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1446; CM-NEXT: MOV * T1.X, KC0[2].Z, 1447; CM-NEXT: ALU clause starting at 17: 1448; CM-NEXT: LSHR T4.X, KC0[2].Y, literal.x, 1449; CM-NEXT: MOV * T3.Y, T1.X, 1450; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1451entry: 1452 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in 1453 %ext = zext <3 x i16> %ld to <3 x i32> 1454 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 1455 ret void 1456} 1457 1458define amdgpu_kernel void @global_sextload_v3i16_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i16> addrspace(1)* %in) { 1459; GCN-NOHSA-SI-LABEL: global_sextload_v3i16_to_v3i32: 1460; GCN-NOHSA-SI: ; %bb.0: ; %entry 1461; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1462; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1463; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1464; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1465; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1466; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1467; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1468; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1469; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 1470; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1471; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1472; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1473; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v0 1474; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v0, 0, 16 1475; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v1, 0, 16 1476; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:8 1477; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[4:7], 0 1478; GCN-NOHSA-SI-NEXT: s_endpgm 1479; 1480; GCN-HSA-LABEL: global_sextload_v3i16_to_v3i32: 1481; GCN-HSA: ; %bb.0: ; %entry 1482; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1483; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1484; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1485; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1486; GCN-HSA-NEXT: flat_load_dwordx2 v[3:4], v[0:1] 1487; GCN-HSA-NEXT: v_mov_b32_e32 v5, s0 1488; GCN-HSA-NEXT: v_mov_b32_e32 v6, s1 1489; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1490; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v3 1491; GCN-HSA-NEXT: v_bfe_i32 v2, v4, 0, 16 1492; GCN-HSA-NEXT: v_bfe_i32 v0, v3, 0, 16 1493; GCN-HSA-NEXT: flat_store_dwordx3 v[5:6], v[0:2] 1494; GCN-HSA-NEXT: s_endpgm 1495; 1496; GCN-NOHSA-VI-LABEL: global_sextload_v3i16_to_v3i32: 1497; GCN-NOHSA-VI: ; %bb.0: ; %entry 1498; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1499; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1500; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1501; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1502; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1503; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1504; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1505; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1506; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[3:4], off, s[8:11], 0 1507; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1508; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1509; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1510; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v3 1511; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v4, 0, 16 1512; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v3, 0, 16 1513; GCN-NOHSA-VI-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 1514; GCN-NOHSA-VI-NEXT: s_endpgm 1515; 1516; EG-LABEL: global_sextload_v3i16_to_v3i32: 1517; EG: ; %bb.0: ; %entry 1518; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 1519; EG-NEXT: TEX 2 @6 1520; EG-NEXT: ALU 9, @13, KC0[CB0:0-32], KC1[] 1521; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 1522; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1523; EG-NEXT: CF_END 1524; EG-NEXT: Fetch clause starting at 6: 1525; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1 1526; EG-NEXT: VTX_READ_16 T2.X, T0.X, 4, #1 1527; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 1528; EG-NEXT: ALU clause starting at 12: 1529; EG-NEXT: MOV * T0.X, KC0[2].Z, 1530; EG-NEXT: ALU clause starting at 13: 1531; EG-NEXT: BFE_INT * T0.Y, T1.X, 0.0, literal.x, 1532; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1533; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 1534; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 1535; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 1536; EG-NEXT: BFE_INT T2.X, T2.X, 0.0, literal.x, 1537; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1538; EG-NEXT: 16(2.242078e-44), 8(1.121039e-44) 1539; EG-NEXT: LSHR * T3.X, PV.W, literal.x, 1540; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1541; 1542; CM-LABEL: global_sextload_v3i16_to_v3i32: 1543; CM: ; %bb.0: ; %entry 1544; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 1545; CM-NEXT: TEX 2 @6 1546; CM-NEXT: ALU 9, @13, KC0[CB0:0-32], KC1[] 1547; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T2.X 1548; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T3.X 1549; CM-NEXT: CF_END 1550; CM-NEXT: Fetch clause starting at 6: 1551; CM-NEXT: VTX_READ_16 T1.X, T0.X, 4, #1 1552; CM-NEXT: VTX_READ_16 T2.X, T0.X, 0, #1 1553; CM-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1 1554; CM-NEXT: ALU clause starting at 12: 1555; CM-NEXT: MOV * T0.X, KC0[2].Z, 1556; CM-NEXT: ALU clause starting at 13: 1557; CM-NEXT: BFE_INT T1.X, T1.X, 0.0, literal.x, 1558; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1559; CM-NEXT: 16(2.242078e-44), 8(1.121039e-44) 1560; CM-NEXT: LSHR T3.X, PV.W, literal.x, 1561; CM-NEXT: BFE_INT * T0.Y, T0.X, 0.0, literal.y, 1562; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1563; CM-NEXT: BFE_INT * T0.X, T2.X, 0.0, literal.x, 1564; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1565; CM-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, 1566; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1567entry: 1568 %ld = load <3 x i16>, <3 x i16> addrspace(1)* %in 1569 %ext = sext <3 x i16> %ld to <3 x i32> 1570 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 1571 ret void 1572} 1573 1574; TODO: This should use DST, but for some there are redundant MOVs 1575define amdgpu_kernel void @global_zextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { 1576; GCN-NOHSA-SI-LABEL: global_zextload_v4i16_to_v4i32: 1577; GCN-NOHSA-SI: ; %bb.0: 1578; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1579; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1580; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1581; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1582; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1583; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1584; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1585; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1586; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[4:5], off, s[8:11], 0 1587; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1588; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1589; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1590; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v5 1591; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v4 1592; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v5 1593; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v4 1594; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1595; GCN-NOHSA-SI-NEXT: s_endpgm 1596; 1597; GCN-HSA-LABEL: global_zextload_v4i16_to_v4i32: 1598; GCN-HSA: ; %bb.0: 1599; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1600; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1601; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1602; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1603; GCN-HSA-NEXT: flat_load_dwordx2 v[4:5], v[0:1] 1604; GCN-HSA-NEXT: v_mov_b32_e32 v6, s0 1605; GCN-HSA-NEXT: v_mov_b32_e32 v7, s1 1606; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1607; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v5 1608; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v4 1609; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v5 1610; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v4 1611; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[0:3] 1612; GCN-HSA-NEXT: s_endpgm 1613; 1614; GCN-NOHSA-VI-LABEL: global_zextload_v4i16_to_v4i32: 1615; GCN-NOHSA-VI: ; %bb.0: 1616; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1617; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1618; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1619; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1620; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1621; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1622; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1623; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1624; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 1625; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1626; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1627; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1628; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 1629; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v1 1630; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1631; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 1632; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1633; GCN-NOHSA-VI-NEXT: s_endpgm 1634; 1635; EG-LABEL: global_zextload_v4i16_to_v4i32: 1636; EG: ; %bb.0: 1637; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1638; EG-NEXT: TEX 0 @6 1639; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] 1640; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 1641; EG-NEXT: CF_END 1642; EG-NEXT: PAD 1643; EG-NEXT: Fetch clause starting at 6: 1644; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1645; EG-NEXT: ALU clause starting at 8: 1646; EG-NEXT: MOV * T5.X, KC0[2].Z, 1647; EG-NEXT: ALU clause starting at 9: 1648; EG-NEXT: MOV T2.X, T5.X, 1649; EG-NEXT: MOV * T3.X, T5.Y, 1650; EG-NEXT: MOV T0.Y, PV.X, 1651; EG-NEXT: MOV * T0.Z, PS, 1652; EG-NEXT: LSHR * T5.W, PV.Z, literal.x, 1653; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1654; EG-NEXT: AND_INT * T5.Z, T0.Z, literal.x, 1655; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1656; EG-NEXT: LSHR * T5.Y, T0.Y, literal.x, 1657; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1658; EG-NEXT: AND_INT T5.X, T0.Y, literal.x, 1659; EG-NEXT: LSHR * T6.X, KC0[2].Y, literal.y, 1660; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1661; 1662; CM-LABEL: global_zextload_v4i16_to_v4i32: 1663; CM: ; %bb.0: 1664; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1665; CM-NEXT: TEX 0 @6 1666; CM-NEXT: ALU 13, @9, KC0[CB0:0-32], KC1[] 1667; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T6.X 1668; CM-NEXT: CF_END 1669; CM-NEXT: PAD 1670; CM-NEXT: Fetch clause starting at 6: 1671; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1672; CM-NEXT: ALU clause starting at 8: 1673; CM-NEXT: MOV * T5.X, KC0[2].Z, 1674; CM-NEXT: ALU clause starting at 9: 1675; CM-NEXT: MOV * T2.X, T5.X, 1676; CM-NEXT: MOV T3.X, T5.Y, 1677; CM-NEXT: MOV * T0.Y, PV.X, 1678; CM-NEXT: MOV * T0.Z, PV.X, 1679; CM-NEXT: LSHR * T5.W, PV.Z, literal.x, 1680; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1681; CM-NEXT: AND_INT * T5.Z, T0.Z, literal.x, 1682; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1683; CM-NEXT: LSHR * T5.Y, T0.Y, literal.x, 1684; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1685; CM-NEXT: AND_INT * T5.X, T0.Y, literal.x, 1686; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1687; CM-NEXT: LSHR * T6.X, KC0[2].Y, literal.x, 1688; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1689 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 1690 %ext = zext <4 x i16> %load to <4 x i32> 1691 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 1692 ret void 1693} 1694 1695; TODO: We should use ASHR instead of LSHR + BFE 1696; TODO: This should use DST, but for some there are redundant MOVs 1697define amdgpu_kernel void @global_sextload_v4i16_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { 1698; GCN-NOHSA-SI-LABEL: global_sextload_v4i16_to_v4i32: 1699; GCN-NOHSA-SI: ; %bb.0: 1700; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1701; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1702; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1703; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1704; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1705; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1706; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1707; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1708; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[3:4], off, s[8:11], 0 1709; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1710; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1711; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1712; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v3 1713; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[5:6], v[3:4], 48 1714; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v4, 0, 16 1715; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v3, 0, 16 1716; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v5 1717; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1718; GCN-NOHSA-SI-NEXT: s_endpgm 1719; 1720; GCN-HSA-LABEL: global_sextload_v4i16_to_v4i32: 1721; GCN-HSA: ; %bb.0: 1722; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1723; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1724; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1725; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1726; GCN-HSA-NEXT: flat_load_dwordx2 v[3:4], v[0:1] 1727; GCN-HSA-NEXT: v_mov_b32_e32 v5, s0 1728; GCN-HSA-NEXT: v_mov_b32_e32 v6, s1 1729; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1730; GCN-HSA-NEXT: v_ashr_i64 v[7:8], v[3:4], 48 1731; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v3 1732; GCN-HSA-NEXT: v_bfe_i32 v2, v4, 0, 16 1733; GCN-HSA-NEXT: v_bfe_i32 v0, v3, 0, 16 1734; GCN-HSA-NEXT: v_mov_b32_e32 v3, v7 1735; GCN-HSA-NEXT: flat_store_dwordx4 v[5:6], v[0:3] 1736; GCN-HSA-NEXT: s_endpgm 1737; 1738; GCN-NOHSA-VI-LABEL: global_sextload_v4i16_to_v4i32: 1739; GCN-NOHSA-VI: ; %bb.0: 1740; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1741; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1742; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1743; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1744; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1745; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1746; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1747; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1748; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[4:5], off, s[8:11], 0 1749; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1750; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1751; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1752; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v5 1753; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v4 1754; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v5, 0, 16 1755; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v4, 0, 16 1756; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 1757; GCN-NOHSA-VI-NEXT: s_endpgm 1758; 1759; EG-LABEL: global_sextload_v4i16_to_v4i32: 1760; EG: ; %bb.0: 1761; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1762; EG-NEXT: TEX 0 @6 1763; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] 1764; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 1765; EG-NEXT: CF_END 1766; EG-NEXT: PAD 1767; EG-NEXT: Fetch clause starting at 6: 1768; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1769; EG-NEXT: ALU clause starting at 8: 1770; EG-NEXT: MOV * T5.X, KC0[2].Z, 1771; EG-NEXT: ALU clause starting at 9: 1772; EG-NEXT: MOV T2.X, T5.X, 1773; EG-NEXT: MOV * T3.X, T5.Y, 1774; EG-NEXT: MOV T0.Y, PV.X, 1775; EG-NEXT: MOV * T0.Z, PS, 1776; EG-NEXT: BFE_INT * T5.Z, PV.Z, 0.0, literal.x, 1777; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1778; EG-NEXT: BFE_INT T5.X, T0.Y, 0.0, literal.x, 1779; EG-NEXT: LSHR * T0.W, T0.Z, literal.x, 1780; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1781; EG-NEXT: BFE_INT T5.W, PV.W, 0.0, literal.x, 1782; EG-NEXT: LSHR * T0.W, T0.Y, literal.x, 1783; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1784; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 1785; EG-NEXT: BFE_INT * T5.Y, PS, 0.0, literal.y, 1786; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1787; 1788; CM-LABEL: global_sextload_v4i16_to_v4i32: 1789; CM: ; %bb.0: 1790; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1791; CM-NEXT: TEX 0 @6 1792; CM-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] 1793; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T6.X 1794; CM-NEXT: CF_END 1795; CM-NEXT: PAD 1796; CM-NEXT: Fetch clause starting at 6: 1797; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 1798; CM-NEXT: ALU clause starting at 8: 1799; CM-NEXT: MOV * T5.X, KC0[2].Z, 1800; CM-NEXT: ALU clause starting at 9: 1801; CM-NEXT: MOV * T2.X, T5.X, 1802; CM-NEXT: MOV T3.X, T5.Y, 1803; CM-NEXT: MOV * T0.Y, PV.X, 1804; CM-NEXT: MOV * T0.Z, PV.X, 1805; CM-NEXT: BFE_INT * T5.Z, PV.Z, 0.0, literal.x, 1806; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1807; CM-NEXT: BFE_INT T5.X, T0.Y, 0.0, literal.x, 1808; CM-NEXT: LSHR * T0.W, T0.Z, literal.x, 1809; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1810; CM-NEXT: LSHR T0.Z, T0.Y, literal.x, 1811; CM-NEXT: BFE_INT * T5.W, PV.W, 0.0, literal.x, 1812; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1813; CM-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 1814; CM-NEXT: BFE_INT * T5.Y, PV.Z, 0.0, literal.y, 1815; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1816 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 1817 %ext = sext <4 x i16> %load to <4 x i32> 1818 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 1819 ret void 1820} 1821 1822; TODO: These should use LSHR instead of BFE_UINT 1823define amdgpu_kernel void @global_zextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { 1824; GCN-NOHSA-SI-LABEL: global_zextload_v8i16_to_v8i32: 1825; GCN-NOHSA-SI: ; %bb.0: 1826; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1827; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1828; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1829; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1830; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1831; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1832; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1833; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1834; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 1835; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1836; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1837; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1838; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v1 1839; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v0 1840; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v3 1841; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v2 1842; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v6, 0xffff, v1 1843; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v0 1844; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, 0xffff, v3 1845; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v2 1846; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16 1847; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 1848; GCN-NOHSA-SI-NEXT: s_endpgm 1849; 1850; GCN-HSA-LABEL: global_zextload_v8i16_to_v8i32: 1851; GCN-HSA: ; %bb.0: 1852; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 1853; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 1854; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 1855; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 1856; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 1857; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 1858; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 1859; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 1860; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1 1861; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 1862; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0 1863; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 1864; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v3 1865; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v2 1866; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v3 1867; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v2 1868; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v1 1869; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v0 1870; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v1 1871; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v0 1872; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] 1873; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7] 1874; GCN-HSA-NEXT: s_endpgm 1875; 1876; GCN-NOHSA-VI-LABEL: global_zextload_v8i16_to_v8i32: 1877; GCN-NOHSA-VI: ; %bb.0: 1878; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1879; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 1880; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 1881; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 1882; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 1883; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 1884; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 1885; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 1886; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 1887; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 1888; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 1889; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 1890; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v3 1891; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v3 1892; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v2 1893; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v2 1894; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v1 1895; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, 0xffff, v1 1896; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v0 1897; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v0 1898; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16 1899; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 1900; GCN-NOHSA-VI-NEXT: s_endpgm 1901; 1902; EG-LABEL: global_zextload_v8i16_to_v8i32: 1903; EG: ; %bb.0: 1904; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1905; EG-NEXT: TEX 0 @6 1906; EG-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[] 1907; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0 1908; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1 1909; EG-NEXT: CF_END 1910; EG-NEXT: Fetch clause starting at 6: 1911; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 1912; EG-NEXT: ALU clause starting at 8: 1913; EG-NEXT: MOV * T7.X, KC0[2].Z, 1914; EG-NEXT: ALU clause starting at 9: 1915; EG-NEXT: LSHR * T8.W, T7.Y, literal.x, 1916; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1917; EG-NEXT: AND_INT * T8.Z, T7.Y, literal.x, 1918; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1919; EG-NEXT: LSHR T8.Y, T7.X, literal.x, 1920; EG-NEXT: LSHR * T9.W, T7.W, literal.x, 1921; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1922; EG-NEXT: AND_INT T8.X, T7.X, literal.x, 1923; EG-NEXT: AND_INT T9.Z, T7.W, literal.x, 1924; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.y, 1925; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 1926; EG-NEXT: LSHR * T9.Y, T7.Z, literal.x, 1927; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1928; EG-NEXT: AND_INT T9.X, T7.Z, literal.x, 1929; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1930; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 1931; EG-NEXT: LSHR * T10.X, PV.W, literal.x, 1932; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1933; 1934; CM-LABEL: global_zextload_v8i16_to_v8i32: 1935; CM: ; %bb.0: 1936; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1937; CM-NEXT: TEX 0 @6 1938; CM-NEXT: ALU 17, @9, KC0[CB0:0-32], KC1[] 1939; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T10.X 1940; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T9.X 1941; CM-NEXT: CF_END 1942; CM-NEXT: Fetch clause starting at 6: 1943; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 1944; CM-NEXT: ALU clause starting at 8: 1945; CM-NEXT: MOV * T7.X, KC0[2].Z, 1946; CM-NEXT: ALU clause starting at 9: 1947; CM-NEXT: LSHR * T8.W, T7.W, literal.x, 1948; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1949; CM-NEXT: AND_INT * T8.Z, T7.W, literal.x, 1950; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1951; CM-NEXT: LSHR T8.Y, T7.Z, literal.x, 1952; CM-NEXT: LSHR * T7.W, T7.Y, literal.x, 1953; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1954; CM-NEXT: AND_INT T8.X, T7.Z, literal.x, 1955; CM-NEXT: AND_INT T7.Z, T7.Y, literal.x, 1956; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 1957; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 1958; CM-NEXT: LSHR T9.X, PV.W, literal.x, 1959; CM-NEXT: LSHR * T7.Y, T7.X, literal.y, 1960; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 1961; CM-NEXT: AND_INT * T7.X, T7.X, literal.x, 1962; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 1963; CM-NEXT: LSHR * T10.X, KC0[2].Y, literal.x, 1964; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1965 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 1966 %ext = zext <8 x i16> %load to <8 x i32> 1967 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 1968 ret void 1969} 1970 1971; TODO: These should use ASHR instead of LSHR + BFE_INT 1972define amdgpu_kernel void @global_sextload_v8i16_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { 1973; GCN-NOHSA-SI-LABEL: global_sextload_v8i16_to_v8i32: 1974; GCN-NOHSA-SI: ; %bb.0: 1975; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1976; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 1977; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 1978; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 1979; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 1980; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 1981; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 1982; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 1983; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 1984; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 1985; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 1986; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 1987; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 16, v1 1988; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 16, v0 1989; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v1, 0, 16 1990; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v0, 0, 16 1991; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v3 1992; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v2 1993; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v3, 0, 16 1994; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v2, 0, 16 1995; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16 1996; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 1997; GCN-NOHSA-SI-NEXT: s_endpgm 1998; 1999; GCN-HSA-LABEL: global_sextload_v8i16_to_v8i32: 2000; GCN-HSA: ; %bb.0: 2001; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2002; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2003; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 2004; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 2005; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2006; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2007; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2008; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 2009; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1 2010; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 2011; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0 2012; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 2013; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 16, v3 2014; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v2 2015; GCN-HSA-NEXT: v_bfe_i32 v10, v3, 0, 16 2016; GCN-HSA-NEXT: v_bfe_i32 v8, v2, 0, 16 2017; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v1 2018; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v0 2019; GCN-HSA-NEXT: v_bfe_i32 v6, v1, 0, 16 2020; GCN-HSA-NEXT: v_bfe_i32 v4, v0, 0, 16 2021; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] 2022; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7] 2023; GCN-HSA-NEXT: s_endpgm 2024; 2025; GCN-NOHSA-VI-LABEL: global_sextload_v8i16_to_v8i32: 2026; GCN-NOHSA-VI: ; %bb.0: 2027; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 2028; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 2029; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 2030; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 2031; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 2032; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2033; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 2034; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 2035; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2036; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 2037; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 2038; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 2039; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v3 2040; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v2 2041; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v3, 0, 16 2042; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v2, 0, 16 2043; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 16, v1 2044; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 16, v0 2045; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v1, 0, 16 2046; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v0, 0, 16 2047; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:16 2048; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 2049; GCN-NOHSA-VI-NEXT: s_endpgm 2050; 2051; EG-LABEL: global_sextload_v8i16_to_v8i32: 2052; EG: ; %bb.0: 2053; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 2054; EG-NEXT: TEX 0 @6 2055; EG-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[] 2056; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T10.X, 0 2057; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T7.X, 1 2058; EG-NEXT: CF_END 2059; EG-NEXT: Fetch clause starting at 6: 2060; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 2061; EG-NEXT: ALU clause starting at 8: 2062; EG-NEXT: MOV * T7.X, KC0[2].Z, 2063; EG-NEXT: ALU clause starting at 9: 2064; EG-NEXT: BFE_INT * T8.Z, T7.Y, 0.0, literal.x, 2065; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2066; EG-NEXT: BFE_INT T8.X, T7.X, 0.0, literal.x, 2067; EG-NEXT: BFE_INT T9.Z, T7.W, 0.0, literal.x, 2068; EG-NEXT: LSHR * T0.W, T7.Y, literal.x, 2069; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2070; EG-NEXT: BFE_INT T9.X, T7.Z, 0.0, literal.x, 2071; EG-NEXT: LSHR T0.Z, T7.W, literal.x, 2072; EG-NEXT: BFE_INT T8.W, PV.W, 0.0, literal.x, 2073; EG-NEXT: LSHR * T0.W, T7.X, literal.x, 2074; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2075; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 2076; EG-NEXT: BFE_INT T8.Y, PS, 0.0, literal.y, 2077; EG-NEXT: LSHR T1.Z, T7.Z, literal.y, 2078; EG-NEXT: BFE_INT T9.W, PV.Z, 0.0, literal.y, 2079; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2080; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2081; EG-NEXT: LSHR T10.X, PS, literal.x, 2082; EG-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y, 2083; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2084; 2085; CM-LABEL: global_sextload_v8i16_to_v8i32: 2086; CM: ; %bb.0: 2087; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 2088; CM-NEXT: TEX 0 @6 2089; CM-NEXT: ALU 19, @9, KC0[CB0:0-32], KC1[] 2090; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T9, T7.X 2091; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T10.X 2092; CM-NEXT: CF_END 2093; CM-NEXT: Fetch clause starting at 6: 2094; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 2095; CM-NEXT: ALU clause starting at 8: 2096; CM-NEXT: MOV * T7.X, KC0[2].Z, 2097; CM-NEXT: ALU clause starting at 9: 2098; CM-NEXT: BFE_INT * T8.Z, T7.W, 0.0, literal.x, 2099; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2100; CM-NEXT: BFE_INT T8.X, T7.Z, 0.0, literal.x, 2101; CM-NEXT: LSHR T0.Y, T7.Y, literal.x, 2102; CM-NEXT: BFE_INT T9.Z, T7.Y, 0.0, literal.x, 2103; CM-NEXT: LSHR * T0.W, T7.W, literal.x, 2104; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2105; CM-NEXT: BFE_INT T9.X, T7.X, 0.0, literal.x, 2106; CM-NEXT: LSHR T1.Y, T7.Z, literal.x, 2107; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.x, 2108; CM-NEXT: BFE_INT * T8.W, PV.W, 0.0, literal.x, 2109; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2110; CM-NEXT: LSHR T10.X, PV.Z, literal.x, 2111; CM-NEXT: BFE_INT T8.Y, PV.Y, 0.0, literal.y, 2112; CM-NEXT: LSHR T0.Z, T7.X, literal.y, 2113; CM-NEXT: BFE_INT * T9.W, T0.Y, 0.0, literal.y, 2114; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2115; CM-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 2116; CM-NEXT: BFE_INT * T9.Y, PV.Z, 0.0, literal.y, 2117; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2118 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 2119 %ext = sext <8 x i16> %load to <8 x i32> 2120 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 2121 ret void 2122} 2123 2124define amdgpu_kernel void @global_zextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { 2125; GCN-NOHSA-SI-LABEL: global_zextload_v16i16_to_v16i32: 2126; GCN-NOHSA-SI: ; %bb.0: 2127; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 2128; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2129; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2130; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 2131; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 2132; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2133; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 2134; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 2135; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2136; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 2137; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 2138; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2139; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 2140; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v1 2141; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v0 2142; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v15, 16, v3 2143; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v2 2144; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 2145; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v5 2146; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v4 2147; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, 0xffff, v1 2148; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v0 2149; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v14, 0xffff, v3 2150; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, 0xffff, v2 2151; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v7 2152; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v6 2153; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, 0xffff, v5 2154; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v4 2155; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v7 2156; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v6 2157; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 2158; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:32 2159; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 2160; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 2161; GCN-NOHSA-SI-NEXT: s_endpgm 2162; 2163; GCN-HSA-LABEL: global_zextload_v16i16_to_v16i32: 2164; GCN-HSA: ; %bb.0: 2165; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2166; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2167; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 2168; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 2169; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 2170; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 2171; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2172; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2173; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2174; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 2175; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2176; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2177; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 2178; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 2179; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 2180; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1 2181; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2182; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0 2183; GCN-HSA-NEXT: s_add_u32 s0, s0, 32 2184; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 2185; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 2186; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 2187; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 2188; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v3 2189; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v2 2190; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v3 2191; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v2 2192; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 2193; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 2194; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v6 2195; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v5 2196; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v4 2197; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v5 2198; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v4 2199; GCN-HSA-NEXT: v_and_b32_e32 v12, 0xffff, v6 2200; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v1 2201; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v0 2202; GCN-HSA-NEXT: v_and_b32_e32 v3, 0xffff, v1 2203; GCN-HSA-NEXT: v_and_b32_e32 v1, 0xffff, v0 2204; GCN-HSA-NEXT: v_mov_b32_e32 v6, s1 2205; GCN-HSA-NEXT: v_lshrrev_b32_e32 v15, 16, v7 2206; GCN-HSA-NEXT: v_and_b32_e32 v14, 0xffff, v7 2207; GCN-HSA-NEXT: v_mov_b32_e32 v5, s0 2208; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[1:4] 2209; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 2210; GCN-HSA-NEXT: flat_store_dwordx4 v[5:6], v[8:11] 2211; GCN-HSA-NEXT: s_endpgm 2212; 2213; GCN-NOHSA-VI-LABEL: global_zextload_v16i16_to_v16i32: 2214; GCN-NOHSA-VI: ; %bb.0: 2215; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 2216; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 2217; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 2218; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 2219; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 2220; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2221; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 2222; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 2223; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2224; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2225; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 2226; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 2227; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 2228; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v1 2229; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 2230; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v7 2231; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v18, 0xffff, v7 2232; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v6 2233; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, 0xffff, v6 2234; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v1 2235; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v0 2236; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v0 2237; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v3 2238; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v14, 0xffff, v3 2239; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v2 2240; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, 0xffff, v2 2241; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v5 2242; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v5 2243; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v4 2244; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v4 2245; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 2246; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2247; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 2248; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 2249; GCN-NOHSA-VI-NEXT: s_endpgm 2250; 2251; EG-LABEL: global_zextload_v16i16_to_v16i32: 2252; EG: ; %bb.0: 2253; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2254; EG-NEXT: TEX 1 @8 2255; EG-NEXT: ALU 35, @13, KC0[CB0:0-32], KC1[] 2256; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T18.X, 0 2257; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T11.X, 0 2258; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T16.X, 0 2259; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T12.X, 1 2260; EG-NEXT: CF_END 2261; EG-NEXT: Fetch clause starting at 8: 2262; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1 2263; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1 2264; EG-NEXT: ALU clause starting at 12: 2265; EG-NEXT: MOV * T11.X, KC0[2].Z, 2266; EG-NEXT: ALU clause starting at 13: 2267; EG-NEXT: LSHR * T13.W, T12.Y, literal.x, 2268; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2269; EG-NEXT: AND_INT * T13.Z, T12.Y, literal.x, 2270; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2271; EG-NEXT: LSHR T13.Y, T12.X, literal.x, 2272; EG-NEXT: LSHR * T14.W, T12.W, literal.x, 2273; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2274; EG-NEXT: AND_INT T13.X, T12.X, literal.x, 2275; EG-NEXT: AND_INT T14.Z, T12.W, literal.x, 2276; EG-NEXT: LSHR * T12.X, KC0[2].Y, literal.y, 2277; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 2278; EG-NEXT: LSHR T14.Y, T12.Z, literal.x, 2279; EG-NEXT: LSHR * T15.W, T11.Y, literal.x, 2280; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2281; EG-NEXT: AND_INT T14.X, T12.Z, literal.x, 2282; EG-NEXT: AND_INT T15.Z, T11.Y, literal.x, 2283; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2284; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 2285; EG-NEXT: LSHR T16.X, PV.W, literal.x, 2286; EG-NEXT: LSHR T15.Y, T11.X, literal.y, 2287; EG-NEXT: LSHR T17.W, T11.W, literal.y, 2288; EG-NEXT: AND_INT * T15.X, T11.X, literal.z, 2289; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2290; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2291; EG-NEXT: AND_INT T17.Z, T11.W, literal.x, 2292; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2293; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2294; EG-NEXT: LSHR T11.X, PV.W, literal.x, 2295; EG-NEXT: LSHR T17.Y, T11.Z, literal.y, 2296; EG-NEXT: AND_INT * T17.X, T11.Z, literal.z, 2297; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2298; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2299; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2300; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2301; EG-NEXT: LSHR * T18.X, PV.W, literal.x, 2302; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2303; 2304; CM-LABEL: global_zextload_v16i16_to_v16i32: 2305; CM: ; %bb.0: 2306; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2307; CM-NEXT: TEX 1 @8 2308; CM-NEXT: ALU 33, @13, KC0[CB0:0-32], KC1[] 2309; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T18.X 2310; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T15, T17.X 2311; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T16.X 2312; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T13, T14.X 2313; CM-NEXT: CF_END 2314; CM-NEXT: Fetch clause starting at 8: 2315; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 2316; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 2317; CM-NEXT: ALU clause starting at 12: 2318; CM-NEXT: MOV * T11.X, KC0[2].Z, 2319; CM-NEXT: ALU clause starting at 13: 2320; CM-NEXT: LSHR * T13.W, T12.W, literal.x, 2321; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2322; CM-NEXT: AND_INT * T13.Z, T12.W, literal.x, 2323; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2324; CM-NEXT: LSHR T13.Y, T12.Z, literal.x, 2325; CM-NEXT: LSHR * T12.W, T12.Y, literal.x, 2326; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2327; CM-NEXT: AND_INT T13.X, T12.Z, literal.x, 2328; CM-NEXT: AND_INT T12.Z, T12.Y, literal.x, 2329; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2330; CM-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 2331; CM-NEXT: LSHR T14.X, PV.W, literal.x, 2332; CM-NEXT: LSHR T12.Y, T12.X, literal.y, 2333; CM-NEXT: LSHR * T15.W, T11.W, literal.y, 2334; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2335; CM-NEXT: AND_INT T12.X, T12.X, literal.x, 2336; CM-NEXT: AND_INT T15.Z, T11.W, literal.x, 2337; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2338; CM-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2339; CM-NEXT: LSHR T16.X, PV.W, literal.x, 2340; CM-NEXT: LSHR T15.Y, T11.Z, literal.y, 2341; CM-NEXT: LSHR * T11.W, T11.Y, literal.y, 2342; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2343; CM-NEXT: AND_INT T15.X, T11.Z, literal.x, 2344; CM-NEXT: AND_INT T11.Z, T11.Y, literal.x, 2345; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2346; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 2347; CM-NEXT: LSHR T17.X, PV.W, literal.x, 2348; CM-NEXT: LSHR * T11.Y, T11.X, literal.y, 2349; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2350; CM-NEXT: AND_INT * T11.X, T11.X, literal.x, 2351; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2352; CM-NEXT: LSHR * T18.X, KC0[2].Y, literal.x, 2353; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2354 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 2355 %ext = zext <16 x i16> %load to <16 x i32> 2356 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 2357 ret void 2358} 2359 2360define amdgpu_kernel void @global_sextload_v16i16_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { 2361; GCN-NOHSA-SI-LABEL: global_sextload_v16i16_to_v16i32: 2362; GCN-NOHSA-SI: ; %bb.0: 2363; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 2364; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2365; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2366; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 2367; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 2368; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2369; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 2370; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 2371; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2372; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 2373; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 2374; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2375; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 2376; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v1 2377; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v0 2378; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v1, 0, 16 2379; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v0, 0, 16 2380; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v15, 16, v3 2381; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v13, 16, v2 2382; GCN-NOHSA-SI-NEXT: v_bfe_i32 v14, v3, 0, 16 2383; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v2, 0, 16 2384; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 2385; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v5 2386; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v4 2387; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v5, 0, 16 2388; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v4, 0, 16 2389; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v19, 16, v7 2390; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 16, v6 2391; GCN-NOHSA-SI-NEXT: v_bfe_i32 v18, v7, 0, 16 2392; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v6, 0, 16 2393; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 2394; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2395; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 2396; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 2397; GCN-NOHSA-SI-NEXT: s_endpgm 2398; 2399; GCN-HSA-LABEL: global_sextload_v16i16_to_v16i32: 2400; GCN-HSA: ; %bb.0: 2401; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2402; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2403; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 2404; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 2405; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 2406; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 2407; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 2408; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2409; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 2410; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 2411; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2412; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2413; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 2414; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 2415; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 2416; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1 2417; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2418; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0 2419; GCN-HSA-NEXT: s_add_u32 s0, s0, 32 2420; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3 2421; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 2422; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 2423; GCN-HSA-NEXT: v_mov_b32_e32 v23, s1 2424; GCN-HSA-NEXT: v_mov_b32_e32 v22, s0 2425; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 2426; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v0 2427; GCN-HSA-NEXT: v_bfe_i32 v10, v1, 0, 16 2428; GCN-HSA-NEXT: v_bfe_i32 v8, v0, 0, 16 2429; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 16, v3 2430; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 16, v2 2431; GCN-HSA-NEXT: v_bfe_i32 v14, v3, 0, 16 2432; GCN-HSA-NEXT: v_bfe_i32 v12, v2, 0, 16 2433; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 16, v1 2434; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 2435; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 2436; GCN-HSA-NEXT: s_waitcnt vmcnt(2) 2437; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v5 2438; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v7 2439; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 16, v6 2440; GCN-HSA-NEXT: v_bfe_i32 v9, v7, 0, 16 2441; GCN-HSA-NEXT: v_bfe_i32 v7, v6, 0, 16 2442; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v4 2443; GCN-HSA-NEXT: v_bfe_i32 v2, v5, 0, 16 2444; GCN-HSA-NEXT: v_bfe_i32 v0, v4, 0, 16 2445; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[7:10] 2446; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[0:3] 2447; GCN-HSA-NEXT: s_endpgm 2448; 2449; GCN-NOHSA-VI-LABEL: global_sextload_v16i16_to_v16i32: 2450; GCN-NOHSA-VI: ; %bb.0: 2451; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 2452; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 2453; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 2454; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 2455; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 2456; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2457; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 2458; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 2459; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2460; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2461; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 2462; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 2463; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 2464; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v1 2465; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 2466; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 16, v7 2467; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 16, v6 2468; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v7, 0, 16 2469; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v6, 0, 16 2470; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v0 2471; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v1, 0, 16 2472; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v0, 0, 16 2473; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 16, v3 2474; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 16, v2 2475; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v3, 0, 16 2476; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v2, 0, 16 2477; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v5 2478; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v4 2479; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v5, 0, 16 2480; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v4, 0, 16 2481; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 2482; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2483; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 2484; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 2485; GCN-NOHSA-VI-NEXT: s_endpgm 2486; 2487; EG-LABEL: global_sextload_v16i16_to_v16i32: 2488; EG: ; %bb.0: 2489; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2490; EG-NEXT: TEX 1 @8 2491; EG-NEXT: ALU 39, @13, KC0[CB0:0-32], KC1[] 2492; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T12.X, 0 2493; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T11.X, 0 2494; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T14.X, 0 2495; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T13.X, 1 2496; EG-NEXT: CF_END 2497; EG-NEXT: Fetch clause starting at 8: 2498; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 2499; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 2500; EG-NEXT: ALU clause starting at 12: 2501; EG-NEXT: MOV * T11.X, KC0[2].Z, 2502; EG-NEXT: ALU clause starting at 13: 2503; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 2504; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2505; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2506; EG-NEXT: LSHR T14.X, PV.W, literal.x, 2507; EG-NEXT: BFE_INT * T15.Z, T11.Y, 0.0, literal.y, 2508; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2509; EG-NEXT: BFE_INT T15.X, T11.X, 0.0, literal.x, 2510; EG-NEXT: LSHR T0.Y, T12.W, literal.x, 2511; EG-NEXT: BFE_INT T16.Z, T11.W, 0.0, literal.x, BS:VEC_120/SCL_212 2512; EG-NEXT: LSHR T0.W, T12.Y, literal.x, 2513; EG-NEXT: LSHR * T1.W, T11.Y, literal.x, 2514; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2515; EG-NEXT: BFE_INT T16.X, T11.Z, 0.0, literal.x, 2516; EG-NEXT: LSHR T1.Y, T11.W, literal.x, 2517; EG-NEXT: BFE_INT T17.Z, T12.Y, 0.0, literal.x, 2518; EG-NEXT: BFE_INT T15.W, PS, 0.0, literal.x, 2519; EG-NEXT: LSHR * T1.W, T11.X, literal.x, 2520; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2521; EG-NEXT: BFE_INT T17.X, T12.X, 0.0, literal.x, 2522; EG-NEXT: BFE_INT T15.Y, PS, 0.0, literal.x, 2523; EG-NEXT: BFE_INT T18.Z, T12.W, 0.0, literal.x, 2524; EG-NEXT: BFE_INT T16.W, PV.Y, 0.0, literal.x, 2525; EG-NEXT: LSHR * T1.W, T11.Z, literal.x, 2526; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2527; EG-NEXT: BFE_INT T18.X, T12.Z, 0.0, literal.x, 2528; EG-NEXT: BFE_INT T16.Y, PS, 0.0, literal.x, 2529; EG-NEXT: LSHR T0.Z, T12.X, literal.x, 2530; EG-NEXT: BFE_INT T17.W, T0.W, 0.0, literal.x, 2531; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2532; EG-NEXT: 16(2.242078e-44), 32(4.484155e-44) 2533; EG-NEXT: LSHR T11.X, PS, literal.x, 2534; EG-NEXT: BFE_INT T17.Y, PV.Z, 0.0, literal.y, 2535; EG-NEXT: LSHR T0.Z, T12.Z, literal.y, 2536; EG-NEXT: BFE_INT T18.W, T0.Y, 0.0, literal.y, 2537; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2538; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2539; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2540; EG-NEXT: LSHR T12.X, PS, literal.x, 2541; EG-NEXT: BFE_INT * T18.Y, PV.Z, 0.0, literal.y, 2542; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2543; 2544; CM-LABEL: global_sextload_v16i16_to_v16i32: 2545; CM: ; %bb.0: 2546; CM-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 2547; CM-NEXT: TEX 1 @8 2548; CM-NEXT: ALU 40, @13, KC0[CB0:0-32], KC1[] 2549; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T17, T11.X 2550; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T18.X 2551; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T16, T14.X 2552; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T15, T13.X 2553; CM-NEXT: CF_END 2554; CM-NEXT: Fetch clause starting at 8: 2555; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 2556; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 2557; CM-NEXT: ALU clause starting at 12: 2558; CM-NEXT: MOV * T11.X, KC0[2].Z, 2559; CM-NEXT: ALU clause starting at 13: 2560; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2561; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00) 2562; CM-NEXT: LSHR T13.X, PV.W, literal.x, 2563; CM-NEXT: LSHR T0.Y, T11.Y, literal.y, 2564; CM-NEXT: LSHR T0.Z, T11.Z, literal.y, 2565; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 2566; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2567; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00) 2568; CM-NEXT: LSHR T14.X, PV.W, literal.x, 2569; CM-NEXT: LSHR T1.Y, T11.W, literal.y, 2570; CM-NEXT: BFE_INT T15.Z, T12.W, 0.0, literal.y, BS:VEC_120/SCL_212 2571; CM-NEXT: LSHR * T0.W, T12.X, literal.y, 2572; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2573; CM-NEXT: BFE_INT T15.X, T12.Z, 0.0, literal.x, 2574; CM-NEXT: LSHR T2.Y, T12.Y, literal.x, 2575; CM-NEXT: BFE_INT T16.Z, T12.Y, 0.0, literal.x, 2576; CM-NEXT: LSHR * T1.W, T12.W, literal.x, 2577; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2578; CM-NEXT: BFE_INT T16.X, T12.X, 0.0, literal.x, 2579; CM-NEXT: LSHR T3.Y, T12.Z, literal.x, 2580; CM-NEXT: BFE_INT T12.Z, T11.W, 0.0, literal.x, 2581; CM-NEXT: BFE_INT * T15.W, PV.W, 0.0, literal.x, 2582; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2583; CM-NEXT: BFE_INT T12.X, T11.Z, 0.0, literal.x, 2584; CM-NEXT: BFE_INT T15.Y, PV.Y, 0.0, literal.x, 2585; CM-NEXT: BFE_INT T17.Z, T11.Y, 0.0, literal.x, 2586; CM-NEXT: BFE_INT * T16.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212 2587; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2588; CM-NEXT: BFE_INT T17.X, T11.X, 0.0, literal.x, 2589; CM-NEXT: BFE_INT T16.Y, T0.W, 0.0, literal.x, 2590; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.x, 2591; CM-NEXT: BFE_INT * T12.W, T1.Y, 0.0, literal.x, 2592; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2593; CM-NEXT: LSHR T18.X, PV.Z, literal.x, 2594; CM-NEXT: BFE_INT T12.Y, T0.Z, 0.0, literal.y, 2595; CM-NEXT: LSHR T0.Z, T11.X, literal.y, 2596; CM-NEXT: BFE_INT * T17.W, T0.Y, 0.0, literal.y, 2597; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2598; CM-NEXT: LSHR T11.X, KC0[2].Y, literal.x, 2599; CM-NEXT: BFE_INT * T17.Y, PV.Z, 0.0, literal.y, 2600; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2601 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 2602 %ext = sext <16 x i16> %load to <16 x i32> 2603 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 2604 ret void 2605} 2606 2607define amdgpu_kernel void @global_zextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { 2608; GCN-NOHSA-SI-LABEL: global_zextload_v32i16_to_v32i32: 2609; GCN-NOHSA-SI: ; %bb.0: 2610; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 2611; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 2612; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 2613; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 2614; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 2615; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 2616; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 2617; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 2618; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2619; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2620; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 2621; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 2622; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3) 2623; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v3 2624; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v2 2625; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v23, 16, v1 2626; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v0 2627; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2) 2628; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v27, 16, v7 2629; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v25, 16, v6 2630; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, 0xffff, v3 2631; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v2 2632; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v22, 0xffff, v1 2633; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, 0xffff, v0 2634; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v5 2635; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v4 2636; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v26, 0xffff, v7 2637; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v24, 0xffff, v6 2638; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v5 2639; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v4 2640; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 2641; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v11 2642; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v10 2643; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v31, 16, v9 2644; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v29, 16, v8 2645; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v6, 0xffff, v11 2646; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v10 2647; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v30, 0xffff, v9 2648; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v28, 0xffff, v8 2649; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 2650; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v15 2651; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v14 2652; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v35, 16, v13 2653; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v33, 16, v12 2654; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, 0xffff, v15 2655; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v14 2656; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v34, 0xffff, v13 2657; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v32, 0xffff, v12 2658; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 2659; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 2660; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96 2661; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 2662; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64 2663; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 2664; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2665; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:48 2666; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 2667; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 2668; GCN-NOHSA-SI-NEXT: s_endpgm 2669; 2670; GCN-HSA-LABEL: global_zextload_v32i16_to_v32i32: 2671; GCN-HSA: ; %bb.0: 2672; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 2673; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 2674; GCN-HSA-NEXT: s_add_u32 s4, s2, 16 2675; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 2676; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 2677; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 2678; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 2679; GCN-HSA-NEXT: s_add_u32 s4, s2, 32 2680; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 2681; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 2682; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 2683; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 2684; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 2685; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 2686; GCN-HSA-NEXT: s_add_u32 s2, s2, 48 2687; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 2688; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] 2689; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 2690; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 2691; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[12:13] 2692; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 2693; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 2694; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x60 2695; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 2696; GCN-HSA-NEXT: s_add_u32 s6, s0, 0x70 2697; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 2698; GCN-HSA-NEXT: s_add_u32 s8, s0, 64 2699; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 2700; GCN-HSA-NEXT: s_add_u32 s10, s0, 0x50 2701; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 2702; GCN-HSA-NEXT: s_add_u32 s12, s0, 32 2703; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0 2704; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3 2705; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 2706; GCN-HSA-NEXT: v_mov_b32_e32 v23, s7 2707; GCN-HSA-NEXT: v_mov_b32_e32 v22, s6 2708; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 2709; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v1 2710; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v0 2711; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v1 2712; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v0 2713; GCN-HSA-NEXT: v_mov_b32_e32 v0, s12 2714; GCN-HSA-NEXT: v_mov_b32_e32 v1, s13 2715; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[16:19] 2716; GCN-HSA-NEXT: v_mov_b32_e32 v0, s8 2717; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 2718; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v5 2719; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v4 2720; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v5 2721; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v4 2722; GCN-HSA-NEXT: v_mov_b32_e32 v1, s9 2723; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 2724; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[16:19] 2725; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 2726; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v7 2727; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v6 2728; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v7 2729; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v6 2730; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 2731; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 2732; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[16:19] 2733; GCN-HSA-NEXT: s_waitcnt vmcnt(4) 2734; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v9 2735; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v8 2736; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v9 2737; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v8 2738; GCN-HSA-NEXT: s_add_u32 s0, s0, 48 2739; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[4:7] 2740; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 2741; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v11 2742; GCN-HSA-NEXT: v_lshrrev_b32_e32 v17, 16, v10 2743; GCN-HSA-NEXT: v_and_b32_e32 v18, 0xffff, v11 2744; GCN-HSA-NEXT: v_and_b32_e32 v16, 0xffff, v10 2745; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 2746; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 2747; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 2748; GCN-HSA-NEXT: s_waitcnt vmcnt(5) 2749; GCN-HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v15 2750; GCN-HSA-NEXT: v_lshrrev_b32_e32 v18, 16, v13 2751; GCN-HSA-NEXT: v_lshrrev_b32_e32 v16, 16, v12 2752; GCN-HSA-NEXT: v_and_b32_e32 v9, 0xffff, v15 2753; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v13 2754; GCN-HSA-NEXT: v_and_b32_e32 v15, 0xffff, v12 2755; GCN-HSA-NEXT: v_mov_b32_e32 v12, s1 2756; GCN-HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v3 2757; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v2 2758; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v3 2759; GCN-HSA-NEXT: v_and_b32_e32 v3, 0xffff, v2 2760; GCN-HSA-NEXT: v_lshrrev_b32_e32 v8, 16, v14 2761; GCN-HSA-NEXT: v_and_b32_e32 v7, 0xffff, v14 2762; GCN-HSA-NEXT: v_mov_b32_e32 v11, s0 2763; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[15:18] 2764; GCN-HSA-NEXT: flat_store_dwordx4 v[22:23], v[7:10] 2765; GCN-HSA-NEXT: flat_store_dwordx4 v[11:12], v[3:6] 2766; GCN-HSA-NEXT: s_endpgm 2767; 2768; GCN-NOHSA-VI-LABEL: global_zextload_v32i16_to_v32i32: 2769; GCN-NOHSA-VI: ; %bb.0: 2770; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 2771; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 2772; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 2773; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 2774; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 2775; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 2776; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 2777; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 2778; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 2779; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 2780; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 2781; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 2782; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 2783; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 2784; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 2785; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v3 2786; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v2 2787; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v23, 16, v1 2788; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 2789; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v35, 16, v13 2790; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v33, 16, v12 2791; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v34, 0xffff, v13 2792; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v32, 0xffff, v12 2793; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v21, 16, v0 2794; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v27, 16, v7 2795; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v25, 16, v6 2796; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v18, 0xffff, v3 2797; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, 0xffff, v2 2798; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v22, 0xffff, v1 2799; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, 0xffff, v0 2800; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v5 2801; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v4 2802; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v26, 0xffff, v7 2803; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v24, 0xffff, v6 2804; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v5 2805; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v4 2806; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v11 2807; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v10 2808; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v31, 16, v9 2809; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v29, 16, v8 2810; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, 0xffff, v11 2811; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v10 2812; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v30, 0xffff, v9 2813; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v28, 0xffff, v8 2814; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v15 2815; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v14 2816; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v15 2817; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v14 2818; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96 2819; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 2820; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64 2821; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 2822; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 2823; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:48 2824; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 2825; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 2826; GCN-NOHSA-VI-NEXT: s_endpgm 2827; 2828; EG-LABEL: global_zextload_v32i16_to_v32i32: 2829; EG: ; %bb.0: 2830; EG-NEXT: ALU 0, @20, KC0[CB0:0-32], KC1[] 2831; EG-NEXT: TEX 3 @12 2832; EG-NEXT: ALU 72, @21, KC0[CB0:0-32], KC1[] 2833; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T34.X, 0 2834; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T33.X, 0 2835; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T32.X, 0 2836; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T30.X, 0 2837; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T29.X, 0 2838; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T27.X, 0 2839; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T26.X, 0 2840; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T24.X, 1 2841; EG-NEXT: CF_END 2842; EG-NEXT: Fetch clause starting at 12: 2843; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 2844; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 48, #1 2845; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1 2846; EG-NEXT: VTX_READ_128 T19.XYZW, T19.X, 16, #1 2847; EG-NEXT: ALU clause starting at 20: 2848; EG-NEXT: MOV * T19.X, KC0[2].Z, 2849; EG-NEXT: ALU clause starting at 21: 2850; EG-NEXT: LSHR * T23.W, T20.W, literal.x, 2851; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2852; EG-NEXT: AND_INT * T23.Z, T20.W, literal.x, 2853; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2854; EG-NEXT: LSHR T23.Y, T20.Z, literal.x, 2855; EG-NEXT: LSHR * T20.W, T20.Y, literal.x, 2856; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2857; EG-NEXT: AND_INT T23.X, T20.Z, literal.x, 2858; EG-NEXT: AND_INT T20.Z, T20.Y, literal.x, 2859; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2860; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 2861; EG-NEXT: LSHR T24.X, PV.W, literal.x, 2862; EG-NEXT: LSHR T20.Y, T20.X, literal.y, 2863; EG-NEXT: LSHR T25.W, T19.W, literal.y, 2864; EG-NEXT: AND_INT * T20.X, T20.X, literal.z, 2865; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2866; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2867; EG-NEXT: AND_INT * T25.Z, T19.W, literal.x, 2868; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2869; EG-NEXT: LSHR T26.X, KC0[2].Y, literal.x, 2870; EG-NEXT: LSHR T25.Y, T19.Z, literal.y, 2871; EG-NEXT: LSHR T19.W, T19.Y, literal.y, 2872; EG-NEXT: AND_INT * T25.X, T19.Z, literal.z, 2873; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2874; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2875; EG-NEXT: AND_INT T19.Z, T19.Y, literal.x, 2876; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2877; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 2878; EG-NEXT: LSHR T27.X, PV.W, literal.x, 2879; EG-NEXT: LSHR T19.Y, T19.X, literal.y, 2880; EG-NEXT: LSHR T28.W, T22.W, literal.y, 2881; EG-NEXT: AND_INT * T19.X, T19.X, literal.z, 2882; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2883; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2884; EG-NEXT: AND_INT T28.Z, T22.W, literal.x, 2885; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2886; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2887; EG-NEXT: LSHR T29.X, PV.W, literal.x, 2888; EG-NEXT: LSHR T28.Y, T22.Z, literal.y, 2889; EG-NEXT: LSHR T22.W, T22.Y, literal.y, 2890; EG-NEXT: AND_INT * T28.X, T22.Z, literal.z, 2891; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2892; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2893; EG-NEXT: AND_INT T22.Z, T22.Y, literal.x, 2894; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2895; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 2896; EG-NEXT: LSHR T30.X, PV.W, literal.x, 2897; EG-NEXT: LSHR T22.Y, T22.X, literal.y, 2898; EG-NEXT: LSHR T31.W, T21.W, literal.y, 2899; EG-NEXT: AND_INT * T22.X, T22.X, literal.z, 2900; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2901; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2902; EG-NEXT: AND_INT T31.Z, T21.W, literal.x, 2903; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2904; EG-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 2905; EG-NEXT: LSHR T32.X, PV.W, literal.x, 2906; EG-NEXT: LSHR T31.Y, T21.Z, literal.y, 2907; EG-NEXT: LSHR T21.W, T21.Y, literal.y, 2908; EG-NEXT: AND_INT * T31.X, T21.Z, literal.z, 2909; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2910; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2911; EG-NEXT: AND_INT T21.Z, T21.Y, literal.x, 2912; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2913; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 2914; EG-NEXT: LSHR T33.X, PV.W, literal.x, 2915; EG-NEXT: LSHR T21.Y, T21.X, literal.y, 2916; EG-NEXT: AND_INT * T21.X, T21.X, literal.z, 2917; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2918; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2919; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 2920; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 2921; EG-NEXT: LSHR * T34.X, PV.W, literal.x, 2922; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2923; 2924; CM-LABEL: global_zextload_v32i16_to_v32i32: 2925; CM: ; %bb.0: 2926; CM-NEXT: ALU 0, @20, KC0[CB0:0-32], KC1[] 2927; CM-NEXT: TEX 3 @12 2928; CM-NEXT: ALU 65, @21, KC0[CB0:0-32], KC1[] 2929; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T33, T34.X 2930; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T31, T21.X 2931; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T30, T32.X 2932; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T28, T22.X 2933; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T27, T29.X 2934; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T25, T19.X 2935; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T24, T26.X 2936; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T23, T20.X 2937; CM-NEXT: CF_END 2938; CM-NEXT: Fetch clause starting at 12: 2939; CM-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1 2940; CM-NEXT: VTX_READ_128 T21.XYZW, T19.X, 0, #1 2941; CM-NEXT: VTX_READ_128 T22.XYZW, T19.X, 16, #1 2942; CM-NEXT: VTX_READ_128 T19.XYZW, T19.X, 32, #1 2943; CM-NEXT: ALU clause starting at 20: 2944; CM-NEXT: MOV * T19.X, KC0[2].Z, 2945; CM-NEXT: ALU clause starting at 21: 2946; CM-NEXT: LSHR * T23.W, T20.Y, literal.x, 2947; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2948; CM-NEXT: AND_INT * T23.Z, T20.Y, literal.x, 2949; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 2950; CM-NEXT: LSHR T23.Y, T20.X, literal.x, 2951; CM-NEXT: LSHR * T24.W, T20.W, literal.x, 2952; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 2953; CM-NEXT: AND_INT T23.X, T20.X, literal.x, 2954; CM-NEXT: AND_INT T24.Z, T20.W, literal.x, 2955; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2956; CM-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 2957; CM-NEXT: LSHR T20.X, PV.W, literal.x, 2958; CM-NEXT: LSHR T24.Y, T20.Z, literal.y, 2959; CM-NEXT: LSHR * T25.W, T19.Y, literal.y, 2960; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2961; CM-NEXT: AND_INT T24.X, T20.Z, literal.x, 2962; CM-NEXT: AND_INT T25.Z, T19.Y, literal.x, 2963; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2964; CM-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 2965; CM-NEXT: LSHR T26.X, PV.W, literal.x, 2966; CM-NEXT: LSHR T25.Y, T19.X, literal.y, 2967; CM-NEXT: LSHR * T27.W, T19.W, literal.y, 2968; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2969; CM-NEXT: AND_INT T25.X, T19.X, literal.x, 2970; CM-NEXT: AND_INT T27.Z, T19.W, literal.x, 2971; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2972; CM-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 2973; CM-NEXT: LSHR T19.X, PV.W, literal.x, 2974; CM-NEXT: LSHR T27.Y, T19.Z, literal.y, 2975; CM-NEXT: LSHR * T28.W, T22.Y, literal.y, 2976; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2977; CM-NEXT: AND_INT T27.X, T19.Z, literal.x, 2978; CM-NEXT: AND_INT T28.Z, T22.Y, literal.x, 2979; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2980; CM-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 2981; CM-NEXT: LSHR T29.X, PV.W, literal.x, 2982; CM-NEXT: LSHR T28.Y, T22.X, literal.y, 2983; CM-NEXT: LSHR * T30.W, T22.W, literal.y, 2984; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2985; CM-NEXT: AND_INT T28.X, T22.X, literal.x, 2986; CM-NEXT: AND_INT T30.Z, T22.W, literal.x, 2987; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2988; CM-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 2989; CM-NEXT: LSHR T22.X, PV.W, literal.x, 2990; CM-NEXT: LSHR T30.Y, T22.Z, literal.y, 2991; CM-NEXT: LSHR * T31.W, T21.Y, literal.y, 2992; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 2993; CM-NEXT: AND_INT T30.X, T22.Z, literal.x, 2994; CM-NEXT: AND_INT T31.Z, T21.Y, literal.x, 2995; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 2996; CM-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 2997; CM-NEXT: LSHR T32.X, PV.W, literal.x, 2998; CM-NEXT: LSHR T31.Y, T21.X, literal.y, 2999; CM-NEXT: LSHR * T33.W, T21.W, literal.y, 3000; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3001; CM-NEXT: AND_INT T31.X, T21.X, literal.x, 3002; CM-NEXT: AND_INT * T33.Z, T21.W, literal.x, 3003; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3004; CM-NEXT: LSHR T21.X, KC0[2].Y, literal.x, 3005; CM-NEXT: LSHR * T33.Y, T21.Z, literal.y, 3006; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3007; CM-NEXT: AND_INT T33.X, T21.Z, literal.x, 3008; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3009; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 3010; CM-NEXT: LSHR * T34.X, PV.W, literal.x, 3011; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 3012 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 3013 %ext = zext <32 x i16> %load to <32 x i32> 3014 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 3015 ret void 3016} 3017 3018define amdgpu_kernel void @global_sextload_v32i16_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { 3019; GCN-NOHSA-SI-LABEL: global_sextload_v32i16_to_v32i32: 3020; GCN-NOHSA-SI: ; %bb.0: 3021; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 3022; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 3023; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 3024; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 3025; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 3026; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 3027; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 3028; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 3029; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 3030; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 3031; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 3032; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 3033; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3) 3034; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v19, 16, v3 3035; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 16, v2 3036; GCN-NOHSA-SI-NEXT: v_bfe_i32 v18, v3, 0, 16 3037; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v2, 0, 16 3038; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v23, 16, v1 3039; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v21, 16, v0 3040; GCN-NOHSA-SI-NEXT: v_bfe_i32 v22, v1, 0, 16 3041; GCN-NOHSA-SI-NEXT: v_bfe_i32 v20, v0, 0, 16 3042; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(2) 3043; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v7 3044; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v6 3045; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v7, 0, 16 3046; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v6, 0, 16 3047; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v27, 16, v5 3048; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v25, 16, v4 3049; GCN-NOHSA-SI-NEXT: v_bfe_i32 v26, v5, 0, 16 3050; GCN-NOHSA-SI-NEXT: v_bfe_i32 v24, v4, 0, 16 3051; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 3052; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 16, v11 3053; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 16, v10 3054; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v11, 0, 16 3055; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v10, 0, 16 3056; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v31, 16, v9 3057; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v29, 16, v8 3058; GCN-NOHSA-SI-NEXT: v_bfe_i32 v30, v9, 0, 16 3059; GCN-NOHSA-SI-NEXT: v_bfe_i32 v28, v8, 0, 16 3060; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3061; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v15 3062; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v14 3063; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v15, 0, 16 3064; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v14, 0, 16 3065; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v35, 16, v13 3066; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v33, 16, v12 3067; GCN-NOHSA-SI-NEXT: v_bfe_i32 v34, v13, 0, 16 3068; GCN-NOHSA-SI-NEXT: v_bfe_i32 v32, v12, 0, 16 3069; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 3070; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 3071; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96 3072; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 3073; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64 3074; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 3075; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32 3076; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3077; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 3078; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 3079; GCN-NOHSA-SI-NEXT: s_endpgm 3080; 3081; GCN-HSA-LABEL: global_sextload_v32i16_to_v32i32: 3082; GCN-HSA: ; %bb.0: 3083; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3084; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3085; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 3086; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 3087; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3088; GCN-HSA-NEXT: s_add_u32 s4, s2, 48 3089; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3090; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 3091; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 3092; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 3093; GCN-HSA-NEXT: s_add_u32 s4, s2, 32 3094; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3095; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 3096; GCN-HSA-NEXT: v_mov_b32_e32 v9, s5 3097; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 3098; GCN-HSA-NEXT: v_mov_b32_e32 v8, s4 3099; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 3100; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] 3101; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 3102; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[12:13] 3103; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 3104; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3105; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 3106; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 16, v1 3107; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 16, v0 3108; GCN-HSA-NEXT: v_bfe_i32 v18, v1, 0, 16 3109; GCN-HSA-NEXT: v_bfe_i32 v16, v0, 0, 16 3110; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 3111; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 3112; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[16:19] 3113; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 3114; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 3115; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 3116; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3117; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3 3118; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 3119; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 3120; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 16, v3 3121; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 16, v2 3122; GCN-HSA-NEXT: v_bfe_i32 v18, v3, 0, 16 3123; GCN-HSA-NEXT: v_bfe_i32 v16, v2, 0, 16 3124; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3125; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[16:19] 3126; GCN-HSA-NEXT: s_waitcnt vmcnt(4) 3127; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v5 3128; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 3129; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 3130; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 3131; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3132; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 3133; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 3134; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 3135; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v4 3136; GCN-HSA-NEXT: v_bfe_i32 v2, v5, 0, 16 3137; GCN-HSA-NEXT: v_bfe_i32 v0, v4, 0, 16 3138; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3139; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[0:3] 3140; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3 3141; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 3142; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 3143; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3144; GCN-HSA-NEXT: s_add_u32 s0, s0, 48 3145; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v7 3146; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v6 3147; GCN-HSA-NEXT: v_bfe_i32 v2, v7, 0, 16 3148; GCN-HSA-NEXT: v_bfe_i32 v0, v6, 0, 16 3149; GCN-HSA-NEXT: s_waitcnt vmcnt(4) 3150; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v9 3151; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v8 3152; GCN-HSA-NEXT: v_bfe_i32 v6, v9, 0, 16 3153; GCN-HSA-NEXT: v_bfe_i32 v4, v8, 0, 16 3154; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 3155; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 3156; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[0:3] 3157; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 3158; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v11 3159; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v10 3160; GCN-HSA-NEXT: v_bfe_i32 v2, v11, 0, 16 3161; GCN-HSA-NEXT: v_bfe_i32 v0, v10, 0, 16 3162; GCN-HSA-NEXT: v_mov_b32_e32 v11, s1 3163; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[4:7] 3164; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[0:3] 3165; GCN-HSA-NEXT: s_waitcnt vmcnt(6) 3166; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v13 3167; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v12 3168; GCN-HSA-NEXT: v_bfe_i32 v6, v13, 0, 16 3169; GCN-HSA-NEXT: v_bfe_i32 v4, v12, 0, 16 3170; GCN-HSA-NEXT: v_mov_b32_e32 v10, s0 3171; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v15 3172; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v14 3173; GCN-HSA-NEXT: v_bfe_i32 v2, v15, 0, 16 3174; GCN-HSA-NEXT: v_bfe_i32 v0, v14, 0, 16 3175; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 3176; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3] 3177; GCN-HSA-NEXT: s_endpgm 3178; 3179; GCN-NOHSA-VI-LABEL: global_sextload_v32i16_to_v32i32: 3180; GCN-NOHSA-VI: ; %bb.0: 3181; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 3182; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 3183; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 3184; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 3185; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 3186; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3187; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 3188; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 3189; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 3190; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 3191; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32 3192; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 3193; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 3194; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 3195; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 3196; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 16, v3 3197; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 16, v2 3198; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v3, 0, 16 3199; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 3200; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v35, 16, v13 3201; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v33, 16, v12 3202; GCN-NOHSA-VI-NEXT: v_bfe_i32 v34, v13, 0, 16 3203; GCN-NOHSA-VI-NEXT: v_bfe_i32 v32, v12, 0, 16 3204; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v2, 0, 16 3205; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v23, 16, v1 3206; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v21, 16, v0 3207; GCN-NOHSA-VI-NEXT: v_bfe_i32 v22, v1, 0, 16 3208; GCN-NOHSA-VI-NEXT: v_bfe_i32 v20, v0, 0, 16 3209; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v7 3210; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v6 3211; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v7, 0, 16 3212; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v6, 0, 16 3213; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v27, 16, v5 3214; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v25, 16, v4 3215; GCN-NOHSA-VI-NEXT: v_bfe_i32 v26, v5, 0, 16 3216; GCN-NOHSA-VI-NEXT: v_bfe_i32 v24, v4, 0, 16 3217; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 16, v11 3218; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 16, v10 3219; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v11, 0, 16 3220; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v10, 0, 16 3221; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v31, 16, v9 3222; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v29, 16, v8 3223; GCN-NOHSA-VI-NEXT: v_bfe_i32 v30, v9, 0, 16 3224; GCN-NOHSA-VI-NEXT: v_bfe_i32 v28, v8, 0, 16 3225; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 16, v15 3226; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 16, v14 3227; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v15, 0, 16 3228; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v14, 0, 16 3229; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:96 3230; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 3231; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:64 3232; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 3233; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:32 3234; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3235; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 3236; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 3237; GCN-NOHSA-VI-NEXT: s_endpgm 3238; 3239; EG-LABEL: global_sextload_v32i16_to_v32i32: 3240; EG: ; %bb.0: 3241; EG-NEXT: ALU 9, @20, KC0[CB0:0-32], KC1[] 3242; EG-NEXT: TEX 3 @12 3243; EG-NEXT: ALU 73, @30, KC0[CB0:0-32], KC1[] 3244; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T22.X, 0 3245; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T34.X, 0 3246; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T28.X, 0 3247; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T27.X, 0 3248; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T26.X, 0 3249; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T21.X, 0 3250; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T20.X, 0 3251; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T19.X, 1 3252; EG-NEXT: CF_END 3253; EG-NEXT: Fetch clause starting at 12: 3254; EG-NEXT: VTX_READ_128 T23.XYZW, T22.X, 16, #1 3255; EG-NEXT: VTX_READ_128 T24.XYZW, T22.X, 32, #1 3256; EG-NEXT: VTX_READ_128 T25.XYZW, T22.X, 0, #1 3257; EG-NEXT: VTX_READ_128 T22.XYZW, T22.X, 48, #1 3258; EG-NEXT: ALU clause starting at 20: 3259; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3260; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3261; EG-NEXT: LSHR T19.X, PV.W, literal.x, 3262; EG-NEXT: LSHR * T20.X, KC0[2].Y, literal.x, 3263; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 3264; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3265; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 3266; EG-NEXT: LSHR T21.X, PV.W, literal.x, 3267; EG-NEXT: MOV * T22.X, KC0[2].Z, 3268; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 3269; EG-NEXT: ALU clause starting at 30: 3270; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3271; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 3272; EG-NEXT: LSHR T26.X, PV.W, literal.x, 3273; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3274; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 3275; EG-NEXT: LSHR T27.X, PV.W, literal.x, 3276; EG-NEXT: LSHR T0.W, T22.Y, literal.y, 3277; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 3278; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3279; EG-NEXT: 64(8.968310e-44), 0(0.000000e+00) 3280; EG-NEXT: LSHR T28.X, PS, literal.x, 3281; EG-NEXT: LSHR T0.Y, T22.W, literal.y, 3282; EG-NEXT: BFE_INT T29.Z, T25.W, 0.0, literal.y, BS:VEC_120/SCL_212 3283; EG-NEXT: LSHR T1.W, T24.Y, literal.y, 3284; EG-NEXT: LSHR * T2.W, T24.W, literal.y, 3285; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3286; EG-NEXT: BFE_INT T29.X, T25.Z, 0.0, literal.x, 3287; EG-NEXT: LSHR T1.Y, T23.Y, literal.x, 3288; EG-NEXT: BFE_INT T30.Z, T25.Y, 0.0, literal.x, BS:VEC_120/SCL_212 3289; EG-NEXT: LSHR T3.W, T23.W, literal.x, 3290; EG-NEXT: LSHR * T4.W, T25.W, literal.x, 3291; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3292; EG-NEXT: BFE_INT T30.X, T25.X, 0.0, literal.x, 3293; EG-NEXT: LSHR T2.Y, T25.Y, literal.x, 3294; EG-NEXT: BFE_INT T31.Z, T23.W, 0.0, literal.x, 3295; EG-NEXT: BFE_INT T29.W, PS, 0.0, literal.x, 3296; EG-NEXT: LSHR * T4.W, T25.Z, literal.x, 3297; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3298; EG-NEXT: BFE_INT T31.X, T23.Z, 0.0, literal.x, 3299; EG-NEXT: BFE_INT T29.Y, PS, 0.0, literal.x, 3300; EG-NEXT: BFE_INT T25.Z, T23.Y, 0.0, literal.x, 3301; EG-NEXT: BFE_INT T30.W, PV.Y, 0.0, literal.x, 3302; EG-NEXT: LSHR * T4.W, T25.X, literal.x, 3303; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3304; EG-NEXT: BFE_INT T25.X, T23.X, 0.0, literal.x, 3305; EG-NEXT: BFE_INT T30.Y, PS, 0.0, literal.x, 3306; EG-NEXT: BFE_INT T32.Z, T24.W, 0.0, literal.x, 3307; EG-NEXT: BFE_INT T31.W, T3.W, 0.0, literal.x, BS:VEC_120/SCL_212 3308; EG-NEXT: LSHR * T3.W, T23.Z, literal.x, 3309; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3310; EG-NEXT: BFE_INT T32.X, T24.Z, 0.0, literal.x, 3311; EG-NEXT: BFE_INT T31.Y, PS, 0.0, literal.x, 3312; EG-NEXT: BFE_INT T23.Z, T24.Y, 0.0, literal.x, 3313; EG-NEXT: BFE_INT T25.W, T1.Y, 0.0, literal.x, BS:VEC_120/SCL_212 3314; EG-NEXT: LSHR * T3.W, T23.X, literal.x, 3315; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3316; EG-NEXT: BFE_INT T23.X, T24.X, 0.0, literal.x, 3317; EG-NEXT: BFE_INT T25.Y, PS, 0.0, literal.x, 3318; EG-NEXT: BFE_INT T33.Z, T22.W, 0.0, literal.x, 3319; EG-NEXT: BFE_INT T32.W, T2.W, 0.0, literal.x, BS:VEC_120/SCL_212 3320; EG-NEXT: LSHR * T2.W, T24.Z, literal.x, 3321; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3322; EG-NEXT: BFE_INT T33.X, T22.Z, 0.0, literal.x, 3323; EG-NEXT: BFE_INT T32.Y, PS, 0.0, literal.x, 3324; EG-NEXT: BFE_INT T24.Z, T22.Y, 0.0, literal.x, 3325; EG-NEXT: BFE_INT T23.W, T1.W, 0.0, literal.x, 3326; EG-NEXT: LSHR * T1.W, T24.X, literal.x, 3327; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3328; EG-NEXT: BFE_INT T24.X, T22.X, 0.0, literal.x, 3329; EG-NEXT: BFE_INT T23.Y, PS, 0.0, literal.x, 3330; EG-NEXT: LSHR T0.Z, T22.Z, literal.x, 3331; EG-NEXT: BFE_INT T33.W, T0.Y, 0.0, literal.x, 3332; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 3333; EG-NEXT: 16(2.242078e-44), 112(1.569454e-43) 3334; EG-NEXT: LSHR T34.X, PS, literal.x, 3335; EG-NEXT: BFE_INT T33.Y, PV.Z, 0.0, literal.y, 3336; EG-NEXT: LSHR T0.Z, T22.X, literal.y, 3337; EG-NEXT: BFE_INT T24.W, T0.W, 0.0, literal.y, 3338; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3339; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3340; EG-NEXT: 96(1.345247e-43), 0(0.000000e+00) 3341; EG-NEXT: LSHR T22.X, PS, literal.x, 3342; EG-NEXT: BFE_INT * T24.Y, PV.Z, 0.0, literal.y, 3343; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3344; 3345; CM-LABEL: global_sextload_v32i16_to_v32i32: 3346; CM: ; %bb.0: 3347; CM-NEXT: ALU 0, @22, KC0[CB0:0-32], KC1[] 3348; CM-NEXT: TEX 0 @14 3349; CM-NEXT: ALU 7, @23, KC0[CB0:0-32], KC1[] 3350; CM-NEXT: TEX 2 @16 3351; CM-NEXT: ALU 76, @31, KC0[CB0:0-32], KC1[] 3352; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T33, T34.X 3353; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T20.X 3354; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T32, T28.X 3355; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T23, T27.X 3356; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T31, T26.X 3357; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T22, T25.X 3358; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T30, T24.X 3359; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T29, T21.X 3360; CM-NEXT: CF_END 3361; CM-NEXT: Fetch clause starting at 14: 3362; CM-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 3363; CM-NEXT: Fetch clause starting at 16: 3364; CM-NEXT: VTX_READ_128 T22.XYZW, T19.X, 48, #1 3365; CM-NEXT: VTX_READ_128 T23.XYZW, T19.X, 32, #1 3366; CM-NEXT: VTX_READ_128 T19.XYZW, T19.X, 16, #1 3367; CM-NEXT: ALU clause starting at 22: 3368; CM-NEXT: MOV * T19.X, KC0[2].Z, 3369; CM-NEXT: ALU clause starting at 23: 3370; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 3371; CM-NEXT: 96(1.345247e-43), 0(0.000000e+00) 3372; CM-NEXT: LSHR T21.X, PV.W, literal.x, 3373; CM-NEXT: LSHR T0.Y, T20.Z, literal.y, 3374; CM-NEXT: LSHR T0.Z, T20.W, literal.y, 3375; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3376; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3377; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00) 3378; CM-NEXT: ALU clause starting at 31: 3379; CM-NEXT: LSHR T24.X, T0.W, literal.x, 3380; CM-NEXT: LSHR T1.Y, T20.Y, literal.y, 3381; CM-NEXT: LSHR T1.Z, T19.Z, literal.y, 3382; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3383; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3384; CM-NEXT: 64(8.968310e-44), 0(0.000000e+00) 3385; CM-NEXT: LSHR T25.X, PV.W, literal.x, 3386; CM-NEXT: LSHR T2.Y, T19.W, literal.y, 3387; CM-NEXT: LSHR T2.Z, T19.X, literal.y, 3388; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3389; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3390; CM-NEXT: 80(1.121039e-43), 0(0.000000e+00) 3391; CM-NEXT: LSHR T26.X, PV.W, literal.x, 3392; CM-NEXT: LSHR T3.Y, T19.Y, literal.y, 3393; CM-NEXT: LSHR T3.Z, T23.Z, literal.y, 3394; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3395; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3396; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00) 3397; CM-NEXT: LSHR T27.X, PV.W, literal.x, 3398; CM-NEXT: LSHR T4.Y, T23.W, literal.y, 3399; CM-NEXT: LSHR T4.Z, T23.X, literal.y, 3400; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 3401; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3402; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00) 3403; CM-NEXT: LSHR T28.X, PV.W, literal.x, 3404; CM-NEXT: LSHR T5.Y, T23.Y, literal.y, 3405; CM-NEXT: BFE_INT T29.Z, T22.Y, 0.0, literal.y, BS:VEC_120/SCL_212 3406; CM-NEXT: LSHR * T0.W, T22.Z, literal.y, 3407; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3408; CM-NEXT: BFE_INT T29.X, T22.X, 0.0, literal.x, 3409; CM-NEXT: LSHR T6.Y, T22.W, literal.x, 3410; CM-NEXT: BFE_INT T30.Z, T22.W, 0.0, literal.x, 3411; CM-NEXT: LSHR * T1.W, T22.Y, literal.x, 3412; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3413; CM-NEXT: BFE_INT T30.X, T22.Z, 0.0, literal.x, 3414; CM-NEXT: LSHR T7.Y, T22.X, literal.x, 3415; CM-NEXT: BFE_INT T22.Z, T23.Y, 0.0, literal.x, 3416; CM-NEXT: BFE_INT * T29.W, PV.W, 0.0, literal.x, 3417; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3418; CM-NEXT: BFE_INT T22.X, T23.X, 0.0, literal.x, 3419; CM-NEXT: BFE_INT T29.Y, PV.Y, 0.0, literal.x, 3420; CM-NEXT: BFE_INT T31.Z, T23.W, 0.0, literal.x, 3421; CM-NEXT: BFE_INT * T30.W, T6.Y, 0.0, literal.x, 3422; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3423; CM-NEXT: BFE_INT T31.X, T23.Z, 0.0, literal.x, 3424; CM-NEXT: BFE_INT T30.Y, T0.W, 0.0, literal.x, 3425; CM-NEXT: BFE_INT T23.Z, T19.Y, 0.0, literal.x, 3426; CM-NEXT: BFE_INT * T22.W, T5.Y, 0.0, literal.x, BS:VEC_120/SCL_212 3427; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3428; CM-NEXT: BFE_INT T23.X, T19.X, 0.0, literal.x, 3429; CM-NEXT: BFE_INT T22.Y, T4.Z, 0.0, literal.x, 3430; CM-NEXT: BFE_INT T32.Z, T19.W, 0.0, literal.x, 3431; CM-NEXT: BFE_INT * T31.W, T4.Y, 0.0, literal.x, 3432; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3433; CM-NEXT: BFE_INT T32.X, T19.Z, 0.0, literal.x, 3434; CM-NEXT: BFE_INT T31.Y, T3.Z, 0.0, literal.x, BS:VEC_120/SCL_212 3435; CM-NEXT: BFE_INT T19.Z, T20.Y, 0.0, literal.x, 3436; CM-NEXT: BFE_INT * T23.W, T3.Y, 0.0, literal.x, BS:VEC_120/SCL_212 3437; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3438; CM-NEXT: BFE_INT T19.X, T20.X, 0.0, literal.x, 3439; CM-NEXT: BFE_INT T23.Y, T2.Z, 0.0, literal.x, 3440; CM-NEXT: BFE_INT T33.Z, T20.W, 0.0, literal.x, 3441; CM-NEXT: BFE_INT * T32.W, T2.Y, 0.0, literal.x, 3442; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3443; CM-NEXT: BFE_INT T33.X, T20.Z, 0.0, literal.x, 3444; CM-NEXT: BFE_INT T32.Y, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212 3445; CM-NEXT: LSHR T1.Z, T20.X, literal.x, 3446; CM-NEXT: BFE_INT * T19.W, T1.Y, 0.0, literal.x, 3447; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3448; CM-NEXT: LSHR T20.X, KC0[2].Y, literal.x, 3449; CM-NEXT: BFE_INT T19.Y, PV.Z, 0.0, literal.y, 3450; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.y, 3451; CM-NEXT: BFE_INT * T33.W, T0.Z, 0.0, literal.y, 3452; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3453; CM-NEXT: LSHR T34.X, PV.Z, literal.x, 3454; CM-NEXT: BFE_INT * T33.Y, T0.Y, 0.0, literal.y, 3455; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3456 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 3457 %ext = sext <32 x i16> %load to <32 x i32> 3458 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 3459 ret void 3460} 3461 3462define amdgpu_kernel void @global_zextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { 3463; GCN-NOHSA-SI-LABEL: global_zextload_v64i16_to_v64i32: 3464; GCN-NOHSA-SI: ; %bb.0: 3465; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 3466; GCN-NOHSA-SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 3467; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, -1 3468; GCN-NOHSA-SI-NEXT: s_mov_b32 s15, 0xe8f000 3469; GCN-NOHSA-SI-NEXT: s_add_u32 s12, s12, s3 3470; GCN-NOHSA-SI-NEXT: s_addc_u32 s13, s13, 0 3471; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 3472; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 3473; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 3474; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 3475; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 3476; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 3477; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 3478; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 3479; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 3480; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:16 3481; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 3482; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v15 3483; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v14 3484; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v13 3485; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v4, 16, v12 3486; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3487; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v19 3488; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v18 3489; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v15 3490; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v14 3491; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill 3492; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3493; GCN-NOHSA-SI-NEXT: buffer_store_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill 3494; GCN-NOHSA-SI-NEXT: buffer_store_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill 3495; GCN-NOHSA-SI-NEXT: buffer_store_dword v3, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill 3496; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v5, 0xffff, v13 3497; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3498; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v3, 0xffff, v12 3499; GCN-NOHSA-SI-NEXT: buffer_store_dword v3, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill 3500; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3501; GCN-NOHSA-SI-NEXT: buffer_store_dword v4, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill 3502; GCN-NOHSA-SI-NEXT: buffer_store_dword v5, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill 3503; GCN-NOHSA-SI-NEXT: buffer_store_dword v6, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill 3504; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v15, 16, v17 3505; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v16 3506; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v19 3507; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3508; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v6, 0xffff, v18 3509; GCN-NOHSA-SI-NEXT: buffer_store_dword v6, off, s[12:15], 0 offset:36 ; 4-byte Folded Spill 3510; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3511; GCN-NOHSA-SI-NEXT: buffer_store_dword v7, off, s[12:15], 0 offset:40 ; 4-byte Folded Spill 3512; GCN-NOHSA-SI-NEXT: buffer_store_dword v8, off, s[12:15], 0 offset:44 ; 4-byte Folded Spill 3513; GCN-NOHSA-SI-NEXT: buffer_store_dword v9, off, s[12:15], 0 offset:48 ; 4-byte Folded Spill 3514; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:32 3515; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v14, 0xffff, v17 3516; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, 0xffff, v16 3517; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:48 3518; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 3519; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v19, 16, v27 3520; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v17, 16, v26 3521; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v23, 16, v25 3522; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v24 3523; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, 0xffff, v27 3524; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v26 3525; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v22, 0xffff, v25 3526; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v20, 0xffff, v24 3527; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3528; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v27, 16, v31 3529; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v25, 16, v30 3530; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v34, 16, v29 3531; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v32, 16, v28 3532; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v26, 0xffff, v31 3533; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v24, 0xffff, v30 3534; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[35:38], off, s[8:11], 0 offset:64 3535; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v33, 0xffff, v29 3536; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v31, 0xffff, v28 3537; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[39:42], off, s[8:11], 0 offset:80 3538; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 3539; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v46, 16, v38 3540; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v44, 16, v37 3541; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v50, 16, v36 3542; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v48, 16, v35 3543; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v45, 0xffff, v38 3544; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v43, 0xffff, v37 3545; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v49, 0xffff, v36 3546; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v47, 0xffff, v35 3547; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3548; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v38, 16, v42 3549; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v36, 16, v41 3550; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v54, 16, v40 3551; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v52, 16, v39 3552; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v37, 0xffff, v42 3553; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v35, 0xffff, v41 3554; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[55:58], off, s[8:11], 0 offset:96 3555; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v53, 0xffff, v40 3556; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v51, 0xffff, v39 3557; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[39:42], off, s[8:11], 0 offset:112 3558; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 3559; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v61, 16, v58 3560; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v59, 16, v57 3561; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v11, 16, v56 3562; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3563; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v55 3564; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v60, 0xffff, v58 3565; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v58, 0xffff, v57 3566; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v10, 0xffff, v56 3567; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v55 3568; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3569; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v42 3570; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v41 3571; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v40 3572; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v39 3573; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v6, 0xffff, v42 3574; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v41 3575; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v2, 0xffff, v40 3576; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v39 3577; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 3578; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 3579; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 3580; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:240 3581; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:192 3582; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[58:61], off, s[0:3], 0 offset:208 3583; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[51:54], off, s[0:3], 0 offset:160 3584; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[35:38], off, s[0:3], 0 offset:176 3585; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[47:50], off, s[0:3], 0 offset:128 3586; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[43:46], off, s[0:3], 0 offset:144 3587; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[31:34], off, s[0:3], 0 offset:96 3588; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:112 3589; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64 3590; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80 3591; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32 3592; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:36 ; 4-byte Folded Reload 3593; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:40 ; 4-byte Folded Reload 3594; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:44 ; 4-byte Folded Reload 3595; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:48 ; 4-byte Folded Reload 3596; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3597; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 3598; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3599; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload 3600; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload 3601; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload 3602; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:32 ; 4-byte Folded Reload 3603; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3604; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3605; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 3606; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload 3607; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload 3608; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload 3609; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload 3610; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 3611; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3612; GCN-NOHSA-SI-NEXT: s_endpgm 3613; 3614; GCN-HSA-LABEL: global_zextload_v64i16_to_v64i32: 3615; GCN-HSA: ; %bb.0: 3616; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 3617; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 3618; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 3619; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 3620; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 3621; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x50 3622; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3623; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 3624; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 3625; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 3626; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x60 3627; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3628; GCN-HSA-NEXT: v_mov_b32_e32 v9, s5 3629; GCN-HSA-NEXT: v_mov_b32_e32 v8, s4 3630; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x70 3631; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3632; GCN-HSA-NEXT: v_mov_b32_e32 v13, s5 3633; GCN-HSA-NEXT: v_mov_b32_e32 v12, s4 3634; GCN-HSA-NEXT: s_add_u32 s4, s2, 16 3635; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 3636; GCN-HSA-NEXT: s_add_u32 s6, s2, 32 3637; GCN-HSA-NEXT: s_addc_u32 s7, s3, 0 3638; GCN-HSA-NEXT: s_add_u32 s8, s2, 48 3639; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[8:9] 3640; GCN-HSA-NEXT: s_addc_u32 s9, s3, 0 3641; GCN-HSA-NEXT: s_add_u32 s2, s2, 64 3642; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 3643; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[12:13] 3644; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 3645; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 3646; GCN-HSA-NEXT: flat_load_dwordx4 v[16:19], v[16:17] 3647; GCN-HSA-NEXT: v_mov_b32_e32 v21, s9 3648; GCN-HSA-NEXT: v_mov_b32_e32 v20, s8 3649; GCN-HSA-NEXT: flat_load_dwordx4 v[20:23], v[20:21] 3650; GCN-HSA-NEXT: v_mov_b32_e32 v29, s5 3651; GCN-HSA-NEXT: v_mov_b32_e32 v28, s4 3652; GCN-HSA-NEXT: flat_load_dwordx4 v[28:31], v[28:29] 3653; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 3654; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3655; GCN-HSA-NEXT: v_mov_b32_e32 v37, s1 3656; GCN-HSA-NEXT: v_mov_b32_e32 v36, s0 3657; GCN-HSA-NEXT: s_waitcnt vmcnt(6) 3658; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v1 3659; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v0 3660; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v1 3661; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v0 3662; GCN-HSA-NEXT: v_mov_b32_e32 v0, s6 3663; GCN-HSA-NEXT: v_mov_b32_e32 v1, s7 3664; GCN-HSA-NEXT: flat_load_dwordx4 v[32:35], v[0:1] 3665; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 3666; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 3667; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 3668; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3669; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xf0 3670; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 3671; GCN-HSA-NEXT: s_add_u32 s6, s0, 0xc0 3672; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 3673; GCN-HSA-NEXT: s_add_u32 s8, s0, 0xd0 3674; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 3675; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[24:27] 3676; GCN-HSA-NEXT: s_add_u32 s10, s0, 0xa0 3677; GCN-HSA-NEXT: v_lshrrev_b32_e32 v27, 16, v3 3678; GCN-HSA-NEXT: v_lshrrev_b32_e32 v25, 16, v2 3679; GCN-HSA-NEXT: v_and_b32_e32 v26, 0xffff, v3 3680; GCN-HSA-NEXT: v_and_b32_e32 v24, 0xffff, v2 3681; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[24:27] 3682; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 3683; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 3684; GCN-HSA-NEXT: s_waitcnt vmcnt(8) 3685; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v5 3686; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v4 3687; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v5 3688; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v4 3689; GCN-HSA-NEXT: v_mov_b32_e32 v4, s10 3690; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 3691; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0 3692; GCN-HSA-NEXT: v_mov_b32_e32 v5, s11 3693; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3694; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3695; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3696; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v7 3697; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v6 3698; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v7 3699; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v6 3700; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3701; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3702; GCN-HSA-NEXT: v_mov_b32_e32 v4, s6 3703; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 3704; GCN-HSA-NEXT: v_mov_b32_e32 v5, s7 3705; GCN-HSA-NEXT: s_waitcnt vmcnt(9) 3706; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v9 3707; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v8 3708; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v9 3709; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v8 3710; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3711; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3712; GCN-HSA-NEXT: v_mov_b32_e32 v4, s8 3713; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v11 3714; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v10 3715; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v11 3716; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v10 3717; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3 3718; GCN-HSA-NEXT: v_mov_b32_e32 v5, s9 3719; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2 3720; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90 3721; GCN-HSA-NEXT: v_mov_b32_e32 v9, s5 3722; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3723; GCN-HSA-NEXT: s_waitcnt vmcnt(10) 3724; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v14 3725; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v13 3726; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v12 3727; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v13 3728; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v12 3729; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v15 3730; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3731; GCN-HSA-NEXT: v_mov_b32_e32 v8, s4 3732; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v15 3733; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v14 3734; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[0:3] 3735; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 3736; GCN-HSA-NEXT: s_waitcnt vmcnt(11) 3737; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v17 3738; GCN-HSA-NEXT: v_mov_b32_e32 v6, s3 3739; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v16 3740; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v17 3741; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v16 3742; GCN-HSA-NEXT: v_mov_b32_e32 v5, s2 3743; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 3744; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v19 3745; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[0:3] 3746; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3747; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v18 3748; GCN-HSA-NEXT: v_and_b32_e32 v3, 0xffff, v19 3749; GCN-HSA-NEXT: v_and_b32_e32 v1, 0xffff, v18 3750; GCN-HSA-NEXT: flat_store_dwordx4 v[5:6], v[1:4] 3751; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 3752; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 3753; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 3754; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3755; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 3756; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 3757; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 3758; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3759; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 3760; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 3761; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 3762; GCN-HSA-NEXT: s_waitcnt vmcnt(10) 3763; GCN-HSA-NEXT: v_lshrrev_b32_e32 v15, 16, v33 3764; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v32 3765; GCN-HSA-NEXT: v_and_b32_e32 v14, 0xffff, v33 3766; GCN-HSA-NEXT: v_and_b32_e32 v12, 0xffff, v32 3767; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3768; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v21 3769; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v20 3770; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v21 3771; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v20 3772; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 3773; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3774; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 3775; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v23 3776; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v22 3777; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v23 3778; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v22 3779; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 3780; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 3781; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 3782; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v35 3783; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v34 3784; GCN-HSA-NEXT: v_and_b32_e32 v10, 0xffff, v35 3785; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v34 3786; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 3787; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 3788; GCN-HSA-NEXT: s_add_u32 s0, s0, 48 3789; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 3790; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v29 3791; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v28 3792; GCN-HSA-NEXT: v_and_b32_e32 v6, 0xffff, v29 3793; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v28 3794; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 3795; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 3796; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 3797; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v31 3798; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 3799; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v30 3800; GCN-HSA-NEXT: v_and_b32_e32 v2, 0xffff, v31 3801; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v30 3802; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 3803; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 3804; GCN-HSA-NEXT: s_endpgm 3805; 3806; GCN-NOHSA-VI-LABEL: global_zextload_v64i16_to_v64i32: 3807; GCN-NOHSA-VI: ; %bb.0: 3808; GCN-NOHSA-VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 3809; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 3810; GCN-NOHSA-VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 3811; GCN-NOHSA-VI-NEXT: s_mov_b32 s90, -1 3812; GCN-NOHSA-VI-NEXT: s_mov_b32 s91, 0xe80000 3813; GCN-NOHSA-VI-NEXT: s_add_u32 s88, s88, s3 3814; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 3815; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 3816; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 3817; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 3818; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 3819; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 3820; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 3821; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 3822; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[8:11], 0 offset:16 3823; GCN-NOHSA-VI-NEXT: s_addc_u32 s89, s89, 0 3824; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 3825; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 3826; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 3827; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v15 3828; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v14 3829; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v15 3830; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v14 3831; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill 3832; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 3833; GCN-NOHSA-VI-NEXT: buffer_store_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill 3834; GCN-NOHSA-VI-NEXT: buffer_store_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill 3835; GCN-NOHSA-VI-NEXT: buffer_store_dword v3, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill 3836; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v13 3837; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v4, 16, v12 3838; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v5, 0xffff, v13 3839; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v3, 0xffff, v12 3840; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v11, 16, v19 3841; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v18 3842; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v15, 16, v17 3843; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v13, 16, v16 3844; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v10, 0xffff, v19 3845; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v18 3846; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v14, 0xffff, v17 3847; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, 0xffff, v16 3848; GCN-NOHSA-VI-NEXT: buffer_store_dword v3, off, s[88:91], 0 offset:20 ; 4-byte Folded Spill 3849; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 3850; GCN-NOHSA-VI-NEXT: buffer_store_dword v4, off, s[88:91], 0 offset:24 ; 4-byte Folded Spill 3851; GCN-NOHSA-VI-NEXT: buffer_store_dword v5, off, s[88:91], 0 offset:28 ; 4-byte Folded Spill 3852; GCN-NOHSA-VI-NEXT: buffer_store_dword v6, off, s[88:91], 0 offset:32 ; 4-byte Folded Spill 3853; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[8:11], 0 offset:32 3854; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[8:11], 0 offset:48 3855; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[35:38], off, s[8:11], 0 offset:64 3856; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[39:42], off, s[8:11], 0 offset:80 3857; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 3858; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v19, 16, v27 3859; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 3860; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v46, 16, v38 3861; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v44, 16, v37 3862; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v50, 16, v36 3863; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v48, 16, v35 3864; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v45, 0xffff, v38 3865; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v43, 0xffff, v37 3866; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v49, 0xffff, v36 3867; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v47, 0xffff, v35 3868; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 3869; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v38, 16, v42 3870; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v36, 16, v41 3871; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v54, 16, v40 3872; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v52, 16, v39 3873; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v37, 0xffff, v42 3874; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v35, 0xffff, v41 3875; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v53, 0xffff, v40 3876; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v51, 0xffff, v39 3877; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[39:42], off, s[8:11], 0 offset:96 3878; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[55:58], off, s[8:11], 0 offset:112 3879; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v17, 16, v26 3880; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v23, 16, v25 3881; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v21, 16, v24 3882; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v18, 0xffff, v27 3883; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, 0xffff, v26 3884; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v22, 0xffff, v25 3885; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, 0xffff, v24 3886; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v27, 16, v31 3887; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v25, 16, v30 3888; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v34, 16, v29 3889; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v32, 16, v28 3890; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v26, 0xffff, v31 3891; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v24, 0xffff, v30 3892; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v33, 0xffff, v29 3893; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v31, 0xffff, v28 3894; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 3895; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v62, 16, v42 3896; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 3897; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v56 3898; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v55 3899; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v2, 0xffff, v56 3900; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v55 3901; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v60, 16, v41 3902; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v40 3903; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v39 3904; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v61, 0xffff, v42 3905; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v59, 0xffff, v41 3906; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v6, 0xffff, v40 3907; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v39 3908; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v42, 16, v58 3909; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v40, 16, v57 3910; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v41, 0xffff, v58 3911; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v39, 0xffff, v57 3912; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 3913; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[39:42], off, s[0:3], 0 offset:240 3914; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:192 3915; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[59:62], off, s[0:3], 0 offset:208 3916; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[51:54], off, s[0:3], 0 offset:160 3917; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[35:38], off, s[0:3], 0 offset:176 3918; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[47:50], off, s[0:3], 0 offset:128 3919; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[43:46], off, s[0:3], 0 offset:144 3920; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[31:34], off, s[0:3], 0 offset:96 3921; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:112 3922; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64 3923; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80 3924; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32 3925; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48 3926; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[88:91], 0 offset:20 ; 4-byte Folded Reload 3927; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:24 ; 4-byte Folded Reload 3928; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:28 ; 4-byte Folded Reload 3929; GCN-NOHSA-VI-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:32 ; 4-byte Folded Reload 3930; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 3931; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 3932; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload 3933; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload 3934; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload 3935; GCN-NOHSA-VI-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:16 ; 4-byte Folded Reload 3936; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 3937; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 3938; GCN-NOHSA-VI-NEXT: s_endpgm 3939; 3940; EG-LABEL: global_zextload_v64i16_to_v64i32: 3941; EG: ; %bb.0: 3942; EG-NEXT: ALU 0, @38, KC0[CB0:0-32], KC1[] 3943; EG-NEXT: TEX 3 @22 3944; EG-NEXT: ALU 56, @39, KC0[CB0:0-32], KC1[] 3945; EG-NEXT: TEX 3 @30 3946; EG-NEXT: ALU 87, @96, KC0[CB0:0-32], KC1[] 3947; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T66.X, 0 3948; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T65.X, 0 3949; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T64.X, 0 3950; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T62.X, 0 3951; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T51.XYZW, T61.X, 0 3952; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T59.X, 0 3953; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T52.XYZW, T58.X, 0 3954; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T54.XYZW, T56.X, 0 3955; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T38.XYZW, T55.X, 0 3956; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T53.X, 0 3957; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T48.X, 0 3958; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T47.X, 0 3959; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T46.X, 0 3960; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T44.X, 0 3961; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T36.XYZW, T43.X, 0 3962; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T41.X, 1 3963; EG-NEXT: CF_END 3964; EG-NEXT: Fetch clause starting at 22: 3965; EG-NEXT: VTX_READ_128 T36.XYZW, T35.X, 0, #1 3966; EG-NEXT: VTX_READ_128 T38.XYZW, T35.X, 48, #1 3967; EG-NEXT: VTX_READ_128 T39.XYZW, T35.X, 32, #1 3968; EG-NEXT: VTX_READ_128 T40.XYZW, T35.X, 16, #1 3969; EG-NEXT: Fetch clause starting at 30: 3970; EG-NEXT: VTX_READ_128 T49.XYZW, T35.X, 112, #1 3971; EG-NEXT: VTX_READ_128 T50.XYZW, T35.X, 96, #1 3972; EG-NEXT: VTX_READ_128 T51.XYZW, T35.X, 80, #1 3973; EG-NEXT: VTX_READ_128 T52.XYZW, T35.X, 64, #1 3974; EG-NEXT: ALU clause starting at 38: 3975; EG-NEXT: MOV * T35.X, KC0[2].Z, 3976; EG-NEXT: ALU clause starting at 39: 3977; EG-NEXT: LSHR * T37.W, T36.W, literal.x, 3978; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3979; EG-NEXT: AND_INT * T37.Z, T36.W, literal.x, 3980; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3981; EG-NEXT: LSHR T37.Y, T36.Z, literal.x, 3982; EG-NEXT: LSHR * T36.W, T36.Y, literal.x, 3983; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 3984; EG-NEXT: AND_INT T37.X, T36.Z, literal.x, 3985; EG-NEXT: AND_INT T36.Z, T36.Y, literal.x, 3986; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 3987; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 3988; EG-NEXT: LSHR T41.X, PV.W, literal.x, 3989; EG-NEXT: LSHR T36.Y, T36.X, literal.y, 3990; EG-NEXT: LSHR T42.W, T40.W, literal.y, 3991; EG-NEXT: AND_INT * T36.X, T36.X, literal.z, 3992; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 3993; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3994; EG-NEXT: AND_INT * T42.Z, T40.W, literal.x, 3995; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 3996; EG-NEXT: LSHR T43.X, KC0[2].Y, literal.x, 3997; EG-NEXT: LSHR T42.Y, T40.Z, literal.y, 3998; EG-NEXT: LSHR T40.W, T40.Y, literal.y, 3999; EG-NEXT: AND_INT * T42.X, T40.Z, literal.z, 4000; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4001; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4002; EG-NEXT: AND_INT T40.Z, T40.Y, literal.x, 4003; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4004; EG-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 4005; EG-NEXT: LSHR T44.X, PV.W, literal.x, 4006; EG-NEXT: LSHR T40.Y, T40.X, literal.y, 4007; EG-NEXT: LSHR T45.W, T39.W, literal.y, 4008; EG-NEXT: AND_INT * T40.X, T40.X, literal.z, 4009; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4010; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4011; EG-NEXT: AND_INT T45.Z, T39.W, literal.x, 4012; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4013; EG-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 4014; EG-NEXT: LSHR T46.X, PV.W, literal.x, 4015; EG-NEXT: LSHR T45.Y, T39.Z, literal.y, 4016; EG-NEXT: LSHR T39.W, T39.Y, literal.y, 4017; EG-NEXT: AND_INT * T45.X, T39.Z, literal.z, 4018; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4019; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4020; EG-NEXT: AND_INT T39.Z, T39.Y, literal.x, 4021; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4022; EG-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 4023; EG-NEXT: LSHR T47.X, PV.W, literal.x, 4024; EG-NEXT: LSHR T39.Y, T39.X, literal.y, 4025; EG-NEXT: AND_INT * T39.X, T39.X, literal.z, 4026; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4027; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4028; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.x, 4029; EG-NEXT: LSHR * T35.W, T38.W, literal.y, 4030; EG-NEXT: 64(8.968310e-44), 16(2.242078e-44) 4031; EG-NEXT: LSHR T48.X, PV.W, literal.x, 4032; EG-NEXT: AND_INT * T35.Z, T38.W, literal.y, 4033; EG-NEXT: 2(2.802597e-45), 65535(9.183409e-41) 4034; EG-NEXT: ALU clause starting at 96: 4035; EG-NEXT: LSHR T35.Y, T38.Z, literal.x, 4036; EG-NEXT: LSHR * T38.W, T38.Y, literal.x, 4037; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4038; EG-NEXT: AND_INT T35.X, T38.Z, literal.x, 4039; EG-NEXT: AND_INT T38.Z, T38.Y, literal.x, 4040; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4041; EG-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 4042; EG-NEXT: LSHR T53.X, PV.W, literal.x, 4043; EG-NEXT: LSHR T38.Y, T38.X, literal.y, 4044; EG-NEXT: LSHR T54.W, T52.W, literal.y, 4045; EG-NEXT: AND_INT * T38.X, T38.X, literal.z, 4046; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4047; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4048; EG-NEXT: AND_INT T54.Z, T52.W, literal.x, 4049; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4050; EG-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 4051; EG-NEXT: LSHR T55.X, PV.W, literal.x, 4052; EG-NEXT: LSHR T54.Y, T52.Z, literal.y, 4053; EG-NEXT: LSHR T52.W, T52.Y, literal.y, 4054; EG-NEXT: AND_INT * T54.X, T52.Z, literal.z, 4055; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4056; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4057; EG-NEXT: AND_INT T52.Z, T52.Y, literal.x, 4058; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4059; EG-NEXT: 65535(9.183409e-41), 144(2.017870e-43) 4060; EG-NEXT: LSHR T56.X, PV.W, literal.x, 4061; EG-NEXT: LSHR T52.Y, T52.X, literal.y, 4062; EG-NEXT: LSHR T57.W, T51.W, literal.y, 4063; EG-NEXT: AND_INT * T52.X, T52.X, literal.z, 4064; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4065; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4066; EG-NEXT: AND_INT T57.Z, T51.W, literal.x, 4067; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4068; EG-NEXT: 65535(9.183409e-41), 128(1.793662e-43) 4069; EG-NEXT: LSHR T58.X, PV.W, literal.x, 4070; EG-NEXT: LSHR T57.Y, T51.Z, literal.y, 4071; EG-NEXT: LSHR T51.W, T51.Y, literal.y, 4072; EG-NEXT: AND_INT * T57.X, T51.Z, literal.z, 4073; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4074; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4075; EG-NEXT: AND_INT T51.Z, T51.Y, literal.x, 4076; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4077; EG-NEXT: 65535(9.183409e-41), 176(2.466285e-43) 4078; EG-NEXT: LSHR T59.X, PV.W, literal.x, 4079; EG-NEXT: LSHR T51.Y, T51.X, literal.y, 4080; EG-NEXT: LSHR T60.W, T50.W, literal.y, 4081; EG-NEXT: AND_INT * T51.X, T51.X, literal.z, 4082; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4083; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4084; EG-NEXT: AND_INT T60.Z, T50.W, literal.x, 4085; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4086; EG-NEXT: 65535(9.183409e-41), 160(2.242078e-43) 4087; EG-NEXT: LSHR T61.X, PV.W, literal.x, 4088; EG-NEXT: LSHR T60.Y, T50.Z, literal.y, 4089; EG-NEXT: LSHR T50.W, T50.Y, literal.y, 4090; EG-NEXT: AND_INT * T60.X, T50.Z, literal.z, 4091; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4092; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4093; EG-NEXT: AND_INT T50.Z, T50.Y, literal.x, 4094; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4095; EG-NEXT: 65535(9.183409e-41), 208(2.914701e-43) 4096; EG-NEXT: LSHR T62.X, PV.W, literal.x, 4097; EG-NEXT: LSHR T50.Y, T50.X, literal.y, 4098; EG-NEXT: LSHR T63.W, T49.W, literal.y, 4099; EG-NEXT: AND_INT * T50.X, T50.X, literal.z, 4100; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4101; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4102; EG-NEXT: AND_INT T63.Z, T49.W, literal.x, 4103; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4104; EG-NEXT: 65535(9.183409e-41), 192(2.690493e-43) 4105; EG-NEXT: LSHR T64.X, PV.W, literal.x, 4106; EG-NEXT: LSHR T63.Y, T49.Z, literal.y, 4107; EG-NEXT: LSHR T49.W, T49.Y, literal.y, 4108; EG-NEXT: AND_INT * T63.X, T49.Z, literal.z, 4109; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4110; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4111; EG-NEXT: AND_INT T49.Z, T49.Y, literal.x, 4112; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4113; EG-NEXT: 65535(9.183409e-41), 240(3.363116e-43) 4114; EG-NEXT: LSHR T65.X, PV.W, literal.x, 4115; EG-NEXT: LSHR T49.Y, T49.X, literal.y, 4116; EG-NEXT: AND_INT * T49.X, T49.X, literal.z, 4117; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4118; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4119; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4120; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00) 4121; EG-NEXT: LSHR * T66.X, PV.W, literal.x, 4122; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4123; 4124; CM-LABEL: global_zextload_v64i16_to_v64i32: 4125; CM: ; %bb.0: 4126; CM-NEXT: ALU 0, @38, KC0[CB0:0-32], KC1[] 4127; CM-NEXT: TEX 3 @22 4128; CM-NEXT: ALU 50, @39, KC0[CB0:0-32], KC1[] 4129; CM-NEXT: TEX 3 @30 4130; CM-NEXT: ALU 78, @90, KC0[CB0:0-32], KC1[] 4131; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T65, T66.X 4132; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T63, T48.X 4133; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T62, T64.X 4134; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T60, T49.X 4135; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T59, T61.X 4136; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T57, T50.X 4137; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T56, T58.X 4138; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T54, T51.X 4139; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T53, T55.X 4140; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T37.X 4141; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T47, T52.X 4142; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T45, T38.X 4143; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T44, T46.X 4144; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T42, T39.X 4145; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T41, T43.X 4146; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T40, T36.X 4147; CM-NEXT: CF_END 4148; CM-NEXT: Fetch clause starting at 22: 4149; CM-NEXT: VTX_READ_128 T36.XYZW, T35.X, 112, #1 4150; CM-NEXT: VTX_READ_128 T37.XYZW, T35.X, 64, #1 4151; CM-NEXT: VTX_READ_128 T38.XYZW, T35.X, 80, #1 4152; CM-NEXT: VTX_READ_128 T39.XYZW, T35.X, 96, #1 4153; CM-NEXT: Fetch clause starting at 30: 4154; CM-NEXT: VTX_READ_128 T48.XYZW, T35.X, 0, #1 4155; CM-NEXT: VTX_READ_128 T49.XYZW, T35.X, 16, #1 4156; CM-NEXT: VTX_READ_128 T50.XYZW, T35.X, 32, #1 4157; CM-NEXT: VTX_READ_128 T51.XYZW, T35.X, 48, #1 4158; CM-NEXT: ALU clause starting at 38: 4159; CM-NEXT: MOV * T35.X, KC0[2].Z, 4160; CM-NEXT: ALU clause starting at 39: 4161; CM-NEXT: LSHR * T40.W, T36.Y, literal.x, 4162; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4163; CM-NEXT: AND_INT * T40.Z, T36.Y, literal.x, 4164; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4165; CM-NEXT: LSHR T40.Y, T36.X, literal.x, 4166; CM-NEXT: LSHR * T41.W, T36.W, literal.x, 4167; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4168; CM-NEXT: AND_INT T40.X, T36.X, literal.x, 4169; CM-NEXT: AND_INT T41.Z, T36.W, literal.x, 4170; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4171; CM-NEXT: 65535(9.183409e-41), 224(3.138909e-43) 4172; CM-NEXT: LSHR T36.X, PV.W, literal.x, 4173; CM-NEXT: LSHR T41.Y, T36.Z, literal.y, 4174; CM-NEXT: LSHR * T42.W, T39.Y, literal.y, 4175; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4176; CM-NEXT: AND_INT T41.X, T36.Z, literal.x, 4177; CM-NEXT: AND_INT T42.Z, T39.Y, literal.x, 4178; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4179; CM-NEXT: 65535(9.183409e-41), 240(3.363116e-43) 4180; CM-NEXT: LSHR T43.X, PV.W, literal.x, 4181; CM-NEXT: LSHR T42.Y, T39.X, literal.y, 4182; CM-NEXT: LSHR * T44.W, T39.W, literal.y, 4183; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4184; CM-NEXT: AND_INT T42.X, T39.X, literal.x, 4185; CM-NEXT: AND_INT T44.Z, T39.W, literal.x, 4186; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4187; CM-NEXT: 65535(9.183409e-41), 192(2.690493e-43) 4188; CM-NEXT: LSHR T39.X, PV.W, literal.x, 4189; CM-NEXT: LSHR T44.Y, T39.Z, literal.y, 4190; CM-NEXT: LSHR * T45.W, T38.Y, literal.y, 4191; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4192; CM-NEXT: AND_INT T44.X, T39.Z, literal.x, 4193; CM-NEXT: AND_INT T45.Z, T38.Y, literal.x, 4194; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4195; CM-NEXT: 65535(9.183409e-41), 208(2.914701e-43) 4196; CM-NEXT: LSHR T46.X, PV.W, literal.x, 4197; CM-NEXT: LSHR T45.Y, T38.X, literal.y, 4198; CM-NEXT: LSHR * T47.W, T38.W, literal.y, 4199; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4200; CM-NEXT: AND_INT T45.X, T38.X, literal.x, 4201; CM-NEXT: AND_INT T47.Z, T38.W, literal.x, 4202; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4203; CM-NEXT: 65535(9.183409e-41), 160(2.242078e-43) 4204; CM-NEXT: LSHR T38.X, PV.W, literal.x, 4205; CM-NEXT: LSHR T47.Y, T38.Z, literal.y, 4206; CM-NEXT: LSHR * T35.W, T37.Y, literal.y, 4207; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4208; CM-NEXT: AND_INT T47.X, T38.Z, literal.x, 4209; CM-NEXT: AND_INT T35.Z, T37.Y, literal.x, 4210; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4211; CM-NEXT: 65535(9.183409e-41), 176(2.466285e-43) 4212; CM-NEXT: ALU clause starting at 90: 4213; CM-NEXT: LSHR T52.X, T0.W, literal.x, 4214; CM-NEXT: LSHR T35.Y, T37.X, literal.y, 4215; CM-NEXT: LSHR * T53.W, T37.W, literal.y, BS:VEC_120/SCL_212 4216; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4217; CM-NEXT: AND_INT T35.X, T37.X, literal.x, 4218; CM-NEXT: AND_INT T53.Z, T37.W, literal.x, 4219; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4220; CM-NEXT: 65535(9.183409e-41), 128(1.793662e-43) 4221; CM-NEXT: LSHR T37.X, PV.W, literal.x, 4222; CM-NEXT: LSHR T53.Y, T37.Z, literal.y, 4223; CM-NEXT: LSHR * T54.W, T51.Y, literal.y, 4224; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4225; CM-NEXT: AND_INT T53.X, T37.Z, literal.x, 4226; CM-NEXT: AND_INT T54.Z, T51.Y, literal.x, 4227; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4228; CM-NEXT: 65535(9.183409e-41), 144(2.017870e-43) 4229; CM-NEXT: LSHR T55.X, PV.W, literal.x, 4230; CM-NEXT: LSHR T54.Y, T51.X, literal.y, 4231; CM-NEXT: LSHR * T56.W, T51.W, literal.y, 4232; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4233; CM-NEXT: AND_INT T54.X, T51.X, literal.x, 4234; CM-NEXT: AND_INT T56.Z, T51.W, literal.x, 4235; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4236; CM-NEXT: 65535(9.183409e-41), 96(1.345247e-43) 4237; CM-NEXT: LSHR T51.X, PV.W, literal.x, 4238; CM-NEXT: LSHR T56.Y, T51.Z, literal.y, 4239; CM-NEXT: LSHR * T57.W, T50.Y, literal.y, 4240; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4241; CM-NEXT: AND_INT T56.X, T51.Z, literal.x, 4242; CM-NEXT: AND_INT T57.Z, T50.Y, literal.x, 4243; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4244; CM-NEXT: 65535(9.183409e-41), 112(1.569454e-43) 4245; CM-NEXT: LSHR T58.X, PV.W, literal.x, 4246; CM-NEXT: LSHR T57.Y, T50.X, literal.y, 4247; CM-NEXT: LSHR * T59.W, T50.W, literal.y, 4248; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4249; CM-NEXT: AND_INT T57.X, T50.X, literal.x, 4250; CM-NEXT: AND_INT T59.Z, T50.W, literal.x, 4251; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4252; CM-NEXT: 65535(9.183409e-41), 64(8.968310e-44) 4253; CM-NEXT: LSHR T50.X, PV.W, literal.x, 4254; CM-NEXT: LSHR T59.Y, T50.Z, literal.y, 4255; CM-NEXT: LSHR * T60.W, T49.Y, literal.y, 4256; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4257; CM-NEXT: AND_INT T59.X, T50.Z, literal.x, 4258; CM-NEXT: AND_INT T60.Z, T49.Y, literal.x, 4259; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4260; CM-NEXT: 65535(9.183409e-41), 80(1.121039e-43) 4261; CM-NEXT: LSHR T61.X, PV.W, literal.x, 4262; CM-NEXT: LSHR T60.Y, T49.X, literal.y, 4263; CM-NEXT: LSHR * T62.W, T49.W, literal.y, 4264; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4265; CM-NEXT: AND_INT T60.X, T49.X, literal.x, 4266; CM-NEXT: AND_INT T62.Z, T49.W, literal.x, 4267; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4268; CM-NEXT: 65535(9.183409e-41), 32(4.484155e-44) 4269; CM-NEXT: LSHR T49.X, PV.W, literal.x, 4270; CM-NEXT: LSHR T62.Y, T49.Z, literal.y, 4271; CM-NEXT: LSHR * T63.W, T48.Y, literal.y, 4272; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4273; CM-NEXT: AND_INT T62.X, T49.Z, literal.x, 4274; CM-NEXT: AND_INT T63.Z, T48.Y, literal.x, 4275; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4276; CM-NEXT: 65535(9.183409e-41), 48(6.726233e-44) 4277; CM-NEXT: LSHR T64.X, PV.W, literal.x, 4278; CM-NEXT: LSHR T63.Y, T48.X, literal.y, 4279; CM-NEXT: LSHR * T65.W, T48.W, literal.y, 4280; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4281; CM-NEXT: AND_INT T63.X, T48.X, literal.x, 4282; CM-NEXT: AND_INT * T65.Z, T48.W, literal.x, 4283; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 4284; CM-NEXT: LSHR T48.X, KC0[2].Y, literal.x, 4285; CM-NEXT: LSHR * T65.Y, T48.Z, literal.y, 4286; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4287; CM-NEXT: AND_INT T65.X, T48.Z, literal.x, 4288; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4289; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 4290; CM-NEXT: LSHR * T66.X, PV.W, literal.x, 4291; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4292 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 4293 %ext = zext <64 x i16> %load to <64 x i32> 4294 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 4295 ret void 4296} 4297 4298define amdgpu_kernel void @global_sextload_v64i16_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { 4299; GCN-NOHSA-SI-LABEL: global_sextload_v64i16_to_v64i32: 4300; GCN-NOHSA-SI: ; %bb.0: 4301; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0 4302; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1 4303; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, -1 4304; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, 0xe8f000 4305; GCN-NOHSA-SI-NEXT: s_add_u32 s8, s8, s3 4306; GCN-NOHSA-SI-NEXT: s_addc_u32 s9, s9, 0 4307; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 4308; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 4309; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 4310; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 4311; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 4312; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 4313; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s6 4314; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s7 4315; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, s2 4316; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, s3 4317; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:112 4318; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:96 4319; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:80 4320; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 offset:64 4321; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 4322; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:16 4323; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[32:35], off, s[4:7], 0 offset:32 4324; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[36:39], off, s[4:7], 0 offset:48 4325; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3) 4326; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v11 4327; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v10 4328; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v11, 0, 16 4329; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v10, 0, 16 4330; GCN-NOHSA-SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill 4331; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4332; GCN-NOHSA-SI-NEXT: buffer_store_dword v1, off, s[8:11], 0 offset:8 ; 4-byte Folded Spill 4333; GCN-NOHSA-SI-NEXT: buffer_store_dword v2, off, s[8:11], 0 offset:12 ; 4-byte Folded Spill 4334; GCN-NOHSA-SI-NEXT: buffer_store_dword v3, off, s[8:11], 0 offset:16 ; 4-byte Folded Spill 4335; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 16, v9 4336; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 16, v8 4337; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v9, 0, 16 4338; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v8, 0, 16 4339; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v11, 16, v31 4340; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 16, v30 4341; GCN-NOHSA-SI-NEXT: v_bfe_i32 v10, v31, 0, 16 4342; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v30, 0, 16 4343; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v43, 16, v29 4344; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v41, 16, v28 4345; GCN-NOHSA-SI-NEXT: v_bfe_i32 v42, v29, 0, 16 4346; GCN-NOHSA-SI-NEXT: v_bfe_i32 v40, v28, 0, 16 4347; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v31, 16, v35 4348; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v29, 16, v34 4349; GCN-NOHSA-SI-NEXT: v_bfe_i32 v30, v35, 0, 16 4350; GCN-NOHSA-SI-NEXT: v_bfe_i32 v28, v34, 0, 16 4351; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v47, 16, v33 4352; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v45, 16, v32 4353; GCN-NOHSA-SI-NEXT: v_bfe_i32 v46, v33, 0, 16 4354; GCN-NOHSA-SI-NEXT: v_bfe_i32 v44, v32, 0, 16 4355; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v35, 16, v39 4356; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v33, 16, v38 4357; GCN-NOHSA-SI-NEXT: v_bfe_i32 v34, v39, 0, 16 4358; GCN-NOHSA-SI-NEXT: v_bfe_i32 v32, v38, 0, 16 4359; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v51, 16, v37 4360; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v49, 16, v36 4361; GCN-NOHSA-SI-NEXT: v_bfe_i32 v50, v37, 0, 16 4362; GCN-NOHSA-SI-NEXT: v_bfe_i32 v48, v36, 0, 16 4363; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v39, 16, v27 4364; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v37, 16, v26 4365; GCN-NOHSA-SI-NEXT: v_bfe_i32 v38, v27, 0, 16 4366; GCN-NOHSA-SI-NEXT: v_bfe_i32 v36, v26, 0, 16 4367; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v55, 16, v25 4368; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v53, 16, v24 4369; GCN-NOHSA-SI-NEXT: v_bfe_i32 v54, v25, 0, 16 4370; GCN-NOHSA-SI-NEXT: v_bfe_i32 v52, v24, 0, 16 4371; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v26, 16, v23 4372; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v24, 16, v22 4373; GCN-NOHSA-SI-NEXT: v_bfe_i32 v25, v23, 0, 16 4374; GCN-NOHSA-SI-NEXT: v_bfe_i32 v23, v22, 0, 16 4375; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v59, 16, v21 4376; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v57, 16, v20 4377; GCN-NOHSA-SI-NEXT: v_bfe_i32 v58, v21, 0, 16 4378; GCN-NOHSA-SI-NEXT: v_bfe_i32 v56, v20, 0, 16 4379; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v22, 16, v19 4380; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v20, 16, v18 4381; GCN-NOHSA-SI-NEXT: v_bfe_i32 v21, v19, 0, 16 4382; GCN-NOHSA-SI-NEXT: v_bfe_i32 v19, v18, 0, 16 4383; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v63, 16, v17 4384; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v61, 16, v16 4385; GCN-NOHSA-SI-NEXT: v_bfe_i32 v62, v17, 0, 16 4386; GCN-NOHSA-SI-NEXT: v_bfe_i32 v60, v16, 0, 16 4387; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v18, 16, v15 4388; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 16, v14 4389; GCN-NOHSA-SI-NEXT: v_bfe_i32 v17, v15, 0, 16 4390; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v14, 0, 16 4391; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 4392; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 16, v13 4393; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 16, v12 4394; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v13, 0, 16 4395; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v12, 0, 16 4396; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 4397; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:240 4398; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:192 4399; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:208 4400; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:160 4401; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:176 4402; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:128 4403; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:144 4404; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:96 4405; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:112 4406; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:64 4407; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:80 4408; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:32 4409; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48 4410; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 4411; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload 4412; GCN-NOHSA-SI-NEXT: buffer_load_dword v1, off, s[8:11], 0 offset:8 ; 4-byte Folded Reload 4413; GCN-NOHSA-SI-NEXT: buffer_load_dword v2, off, s[8:11], 0 offset:12 ; 4-byte Folded Reload 4414; GCN-NOHSA-SI-NEXT: buffer_load_dword v3, off, s[8:11], 0 offset:16 ; 4-byte Folded Reload 4415; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 4416; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4417; GCN-NOHSA-SI-NEXT: s_endpgm 4418; 4419; GCN-HSA-LABEL: global_sextload_v64i16_to_v64i32: 4420; GCN-HSA: ; %bb.0: 4421; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 4422; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 4423; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 4424; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 4425; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[0:1] 4426; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x70 4427; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 4428; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 4429; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 4430; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[0:1] 4431; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x60 4432; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 4433; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 4434; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 4435; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[0:1] 4436; GCN-HSA-NEXT: s_add_u32 s4, s2, 0x50 4437; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 4438; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 4439; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 4440; GCN-HSA-NEXT: s_add_u32 s4, s2, 64 4441; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 4442; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 4443; GCN-HSA-NEXT: v_mov_b32_e32 v17, s5 4444; GCN-HSA-NEXT: v_mov_b32_e32 v16, s4 4445; GCN-HSA-NEXT: flat_load_dwordx4 v[16:19], v[16:17] 4446; GCN-HSA-NEXT: s_add_u32 s4, s2, 48 4447; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 4448; GCN-HSA-NEXT: s_add_u32 s6, s2, 32 4449; GCN-HSA-NEXT: s_addc_u32 s7, s3, 0 4450; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 4451; GCN-HSA-NEXT: v_mov_b32_e32 v21, s5 4452; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 4453; GCN-HSA-NEXT: v_mov_b32_e32 v20, s4 4454; GCN-HSA-NEXT: v_mov_b32_e32 v33, s7 4455; GCN-HSA-NEXT: flat_load_dwordx4 v[20:23], v[20:21] 4456; GCN-HSA-NEXT: v_mov_b32_e32 v32, s6 4457; GCN-HSA-NEXT: v_mov_b32_e32 v37, s1 4458; GCN-HSA-NEXT: v_mov_b32_e32 v36, s0 4459; GCN-HSA-NEXT: s_waitcnt vmcnt(5) 4460; GCN-HSA-NEXT: v_ashrrev_i32_e32 v27, 16, v13 4461; GCN-HSA-NEXT: v_ashrrev_i32_e32 v25, 16, v12 4462; GCN-HSA-NEXT: v_bfe_i32 v26, v13, 0, 16 4463; GCN-HSA-NEXT: v_bfe_i32 v24, v12, 0, 16 4464; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 4465; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 4466; GCN-HSA-NEXT: flat_load_dwordx4 v[28:31], v[12:13] 4467; GCN-HSA-NEXT: flat_load_dwordx4 v[32:35], v[32:33] 4468; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 4469; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4470; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 4471; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 4472; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 4473; GCN-HSA-NEXT: flat_store_dwordx4 v[36:37], v[24:27] 4474; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4475; GCN-HSA-NEXT: v_ashrrev_i32_e32 v27, 16, v15 4476; GCN-HSA-NEXT: v_ashrrev_i32_e32 v25, 16, v14 4477; GCN-HSA-NEXT: v_bfe_i32 v26, v15, 0, 16 4478; GCN-HSA-NEXT: v_bfe_i32 v24, v14, 0, 16 4479; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[24:27] 4480; GCN-HSA-NEXT: s_waitcnt vmcnt(8) 4481; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 16, v9 4482; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 16, v8 4483; GCN-HSA-NEXT: v_bfe_i32 v14, v9, 0, 16 4484; GCN-HSA-NEXT: v_bfe_i32 v12, v8, 0, 16 4485; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 4486; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 4487; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0 4488; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4489; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[12:15] 4490; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 4491; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 4492; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 4493; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4494; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 4495; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 4496; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xd0 4497; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 16, v11 4498; GCN-HSA-NEXT: v_ashrrev_i32_e32 v12, 16, v10 4499; GCN-HSA-NEXT: v_bfe_i32 v13, v11, 0, 16 4500; GCN-HSA-NEXT: v_bfe_i32 v11, v10, 0, 16 4501; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4502; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[11:14] 4503; GCN-HSA-NEXT: s_waitcnt vmcnt(9) 4504; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v4 4505; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 4506; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 4507; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0 4508; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4509; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 4510; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 4511; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xb0 4512; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 16, v5 4513; GCN-HSA-NEXT: v_bfe_i32 v10, v5, 0, 16 4514; GCN-HSA-NEXT: v_bfe_i32 v8, v4, 0, 16 4515; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4516; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[8:11] 4517; GCN-HSA-NEXT: v_mov_b32_e32 v25, s3 4518; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 16, v7 4519; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 16, v6 4520; GCN-HSA-NEXT: v_bfe_i32 v9, v7, 0, 16 4521; GCN-HSA-NEXT: v_bfe_i32 v7, v6, 0, 16 4522; GCN-HSA-NEXT: v_mov_b32_e32 v24, s2 4523; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 4524; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[7:10] 4525; GCN-HSA-NEXT: s_waitcnt vmcnt(10) 4526; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v0 4527; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v1 4528; GCN-HSA-NEXT: v_bfe_i32 v6, v1, 0, 16 4529; GCN-HSA-NEXT: v_bfe_i32 v4, v0, 0, 16 4530; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4531; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 16, v3 4532; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v2 4533; GCN-HSA-NEXT: v_bfe_i32 v10, v3, 0, 16 4534; GCN-HSA-NEXT: v_bfe_i32 v8, v2, 0, 16 4535; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[4:7] 4536; GCN-HSA-NEXT: flat_store_dwordx4 v[24:25], v[8:11] 4537; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 4538; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 4539; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x90 4540; GCN-HSA-NEXT: s_waitcnt vmcnt(11) 4541; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v17 4542; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v16 4543; GCN-HSA-NEXT: v_bfe_i32 v2, v17, 0, 16 4544; GCN-HSA-NEXT: v_bfe_i32 v0, v16, 0, 16 4545; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4546; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4547; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 4548; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 4549; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 4550; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v19 4551; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v18 4552; GCN-HSA-NEXT: v_bfe_i32 v2, v19, 0, 16 4553; GCN-HSA-NEXT: v_bfe_i32 v0, v18, 0, 16 4554; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4555; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4556; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 4557; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 4558; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 4559; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4560; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 4561; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 4562; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 4563; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4564; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 4565; GCN-HSA-NEXT: s_waitcnt vmcnt(12) 4566; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 16, v23 4567; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v22 4568; GCN-HSA-NEXT: v_bfe_i32 v10, v23, 0, 16 4569; GCN-HSA-NEXT: v_bfe_i32 v8, v22, 0, 16 4570; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 4571; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 4572; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 4573; GCN-HSA-NEXT: s_waitcnt vmcnt(11) 4574; GCN-HSA-NEXT: v_ashrrev_i32_e32 v15, 16, v33 4575; GCN-HSA-NEXT: v_ashrrev_i32_e32 v13, 16, v32 4576; GCN-HSA-NEXT: v_bfe_i32 v14, v33, 0, 16 4577; GCN-HSA-NEXT: v_bfe_i32 v12, v32, 0, 16 4578; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4579; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[12:15] 4580; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 16, v35 4581; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 4582; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 4583; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 4584; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 16, v34 4585; GCN-HSA-NEXT: v_bfe_i32 v10, v35, 0, 16 4586; GCN-HSA-NEXT: v_bfe_i32 v8, v34, 0, 16 4587; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 4588; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v21 4589; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v20 4590; GCN-HSA-NEXT: v_bfe_i32 v2, v21, 0, 16 4591; GCN-HSA-NEXT: v_bfe_i32 v0, v20, 0, 16 4592; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[8:11] 4593; GCN-HSA-NEXT: s_add_u32 s0, s0, 48 4594; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 4595; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4596; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 16, v29 4597; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 16, v28 4598; GCN-HSA-NEXT: v_bfe_i32 v6, v29, 0, 16 4599; GCN-HSA-NEXT: v_bfe_i32 v4, v28, 0, 16 4600; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 4601; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 4602; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 4603; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 16, v31 4604; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 4605; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 16, v30 4606; GCN-HSA-NEXT: v_bfe_i32 v2, v31, 0, 16 4607; GCN-HSA-NEXT: v_bfe_i32 v0, v30, 0, 16 4608; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 4609; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 4610; GCN-HSA-NEXT: s_endpgm 4611; 4612; GCN-NOHSA-VI-LABEL: global_sextload_v64i16_to_v64i32: 4613; GCN-NOHSA-VI: ; %bb.0: 4614; GCN-NOHSA-VI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0 4615; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 4616; GCN-NOHSA-VI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1 4617; GCN-NOHSA-VI-NEXT: s_mov_b32 s90, -1 4618; GCN-NOHSA-VI-NEXT: s_mov_b32 s91, 0xe80000 4619; GCN-NOHSA-VI-NEXT: s_add_u32 s88, s88, s3 4620; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 4621; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 4622; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 4623; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 4624; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 4625; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s6 4626; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s7 4627; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, s2 4628; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, s3 4629; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:112 4630; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[4:7], 0 offset:96 4631; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[16:19], off, s[4:7], 0 offset:80 4632; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[20:23], off, s[4:7], 0 offset:64 4633; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[24:27], off, s[4:7], 0 4634; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[28:31], off, s[4:7], 0 offset:16 4635; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[32:35], off, s[4:7], 0 offset:32 4636; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[36:39], off, s[4:7], 0 offset:48 4637; GCN-NOHSA-VI-NEXT: s_addc_u32 s89, s89, 0 4638; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(6) 4639; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v63, 16, v13 4640; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(5) 4641; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v59, 16, v17 4642; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(4) 4643; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v55, 16, v21 4644; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 4645; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v27 4646; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v26 4647; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v27, 0, 16 4648; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v26, 0, 16 4649; GCN-NOHSA-VI-NEXT: buffer_store_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Spill 4650; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4651; GCN-NOHSA-VI-NEXT: buffer_store_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Spill 4652; GCN-NOHSA-VI-NEXT: buffer_store_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Spill 4653; GCN-NOHSA-VI-NEXT: buffer_store_dword v3, off, s[88:91], 0 offset:16 ; 4-byte Folded Spill 4654; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 16, v9 4655; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 16, v8 4656; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v9, 0, 16 4657; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v8, 0, 16 4658; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 16, v25 4659; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 16, v24 4660; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v25, 0, 16 4661; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v24, 0, 16 4662; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v27, 16, v31 4663; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v25, 16, v30 4664; GCN-NOHSA-VI-NEXT: v_bfe_i32 v26, v31, 0, 16 4665; GCN-NOHSA-VI-NEXT: v_bfe_i32 v24, v30, 0, 16 4666; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v43, 16, v29 4667; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v41, 16, v28 4668; GCN-NOHSA-VI-NEXT: v_bfe_i32 v42, v29, 0, 16 4669; GCN-NOHSA-VI-NEXT: v_bfe_i32 v40, v28, 0, 16 4670; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v31, 16, v35 4671; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v29, 16, v34 4672; GCN-NOHSA-VI-NEXT: v_bfe_i32 v30, v35, 0, 16 4673; GCN-NOHSA-VI-NEXT: v_bfe_i32 v28, v34, 0, 16 4674; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v47, 16, v33 4675; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v45, 16, v32 4676; GCN-NOHSA-VI-NEXT: v_bfe_i32 v46, v33, 0, 16 4677; GCN-NOHSA-VI-NEXT: v_bfe_i32 v44, v32, 0, 16 4678; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v35, 16, v39 4679; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v33, 16, v38 4680; GCN-NOHSA-VI-NEXT: v_bfe_i32 v34, v39, 0, 16 4681; GCN-NOHSA-VI-NEXT: v_bfe_i32 v32, v38, 0, 16 4682; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v51, 16, v37 4683; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v49, 16, v36 4684; GCN-NOHSA-VI-NEXT: v_bfe_i32 v50, v37, 0, 16 4685; GCN-NOHSA-VI-NEXT: v_bfe_i32 v48, v36, 0, 16 4686; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v39, 16, v23 4687; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v37, 16, v22 4688; GCN-NOHSA-VI-NEXT: v_bfe_i32 v38, v23, 0, 16 4689; GCN-NOHSA-VI-NEXT: v_bfe_i32 v36, v22, 0, 16 4690; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v53, 16, v20 4691; GCN-NOHSA-VI-NEXT: v_bfe_i32 v54, v21, 0, 16 4692; GCN-NOHSA-VI-NEXT: v_bfe_i32 v52, v20, 0, 16 4693; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v22, 16, v19 4694; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v20, 16, v18 4695; GCN-NOHSA-VI-NEXT: v_bfe_i32 v21, v19, 0, 16 4696; GCN-NOHSA-VI-NEXT: v_bfe_i32 v19, v18, 0, 16 4697; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v57, 16, v16 4698; GCN-NOHSA-VI-NEXT: v_bfe_i32 v58, v17, 0, 16 4699; GCN-NOHSA-VI-NEXT: v_bfe_i32 v56, v16, 0, 16 4700; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 16, v15 4701; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 16, v14 4702; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v15, 0, 16 4703; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v14, 0, 16 4704; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v61, 16, v12 4705; GCN-NOHSA-VI-NEXT: v_bfe_i32 v62, v13, 0, 16 4706; GCN-NOHSA-VI-NEXT: v_bfe_i32 v60, v12, 0, 16 4707; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 16, v11 4708; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 16, v10 4709; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v11, 0, 16 4710; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v10, 0, 16 4711; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:224 4712; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:240 4713; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[60:63], off, s[0:3], 0 offset:192 4714; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:208 4715; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:160 4716; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:176 4717; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:128 4718; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[36:39], off, s[0:3], 0 offset:144 4719; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[48:51], off, s[0:3], 0 offset:96 4720; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[32:35], off, s[0:3], 0 offset:112 4721; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[44:47], off, s[0:3], 0 offset:64 4722; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[28:31], off, s[0:3], 0 offset:80 4723; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[40:43], off, s[0:3], 0 offset:32 4724; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:48 4725; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 4726; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[88:91], 0 offset:4 ; 4-byte Folded Reload 4727; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[88:91], 0 offset:8 ; 4-byte Folded Reload 4728; GCN-NOHSA-VI-NEXT: buffer_load_dword v2, off, s[88:91], 0 offset:12 ; 4-byte Folded Reload 4729; GCN-NOHSA-VI-NEXT: buffer_load_dword v3, off, s[88:91], 0 offset:16 ; 4-byte Folded Reload 4730; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 4731; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 4732; GCN-NOHSA-VI-NEXT: s_endpgm 4733; 4734; EG-LABEL: global_sextload_v64i16_to_v64i32: 4735; EG: ; %bb.0: 4736; EG-NEXT: ALU 18, @38, KC0[CB0:0-32], KC1[] 4737; EG-NEXT: TEX 7 @22 4738; EG-NEXT: ALU 75, @57, KC0[CB0:0-32], KC1[] 4739; EG-NEXT: ALU 71, @133, KC0[CB0:0-32], KC1[] 4740; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T41.X, 0 4741; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T65.XYZW, T66.X, 0 4742; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T56.X, 0 4743; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T64.XYZW, T55.X, 0 4744; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T54.X, 0 4745; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T63.XYZW, T53.X, 0 4746; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T52.X, 0 4747; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T62.XYZW, T51.X, 0 4748; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T50.X, 0 4749; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T61.XYZW, T49.X, 0 4750; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T40.X, 0 4751; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T60.XYZW, T39.X, 0 4752; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T38.X, 0 4753; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T59.XYZW, T37.X, 0 4754; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T58.XYZW, T36.X, 0 4755; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T57.XYZW, T35.X, 1 4756; EG-NEXT: CF_END 4757; EG-NEXT: PAD 4758; EG-NEXT: Fetch clause starting at 22: 4759; EG-NEXT: VTX_READ_128 T42.XYZW, T41.X, 16, #1 4760; EG-NEXT: VTX_READ_128 T43.XYZW, T41.X, 32, #1 4761; EG-NEXT: VTX_READ_128 T44.XYZW, T41.X, 0, #1 4762; EG-NEXT: VTX_READ_128 T45.XYZW, T41.X, 48, #1 4763; EG-NEXT: VTX_READ_128 T46.XYZW, T41.X, 64, #1 4764; EG-NEXT: VTX_READ_128 T47.XYZW, T41.X, 80, #1 4765; EG-NEXT: VTX_READ_128 T48.XYZW, T41.X, 96, #1 4766; EG-NEXT: VTX_READ_128 T41.XYZW, T41.X, 112, #1 4767; EG-NEXT: ALU clause starting at 38: 4768; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4769; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4770; EG-NEXT: LSHR T35.X, PV.W, literal.x, 4771; EG-NEXT: LSHR * T36.X, KC0[2].Y, literal.x, 4772; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4773; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4774; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 4775; EG-NEXT: LSHR T37.X, PV.W, literal.x, 4776; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4777; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 4778; EG-NEXT: LSHR T38.X, PV.W, literal.x, 4779; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4780; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 4781; EG-NEXT: LSHR T39.X, PV.W, literal.x, 4782; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4783; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 4784; EG-NEXT: LSHR T40.X, PV.W, literal.x, 4785; EG-NEXT: MOV * T41.X, KC0[2].Z, 4786; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 4787; EG-NEXT: ALU clause starting at 57: 4788; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4789; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 4790; EG-NEXT: LSHR T49.X, PV.W, literal.x, 4791; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4792; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 4793; EG-NEXT: LSHR T50.X, PV.W, literal.x, 4794; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4795; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 4796; EG-NEXT: LSHR T51.X, PV.W, literal.x, 4797; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4798; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 4799; EG-NEXT: LSHR T52.X, PV.W, literal.x, 4800; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4801; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 4802; EG-NEXT: LSHR T53.X, PV.W, literal.x, 4803; EG-NEXT: LSHR T0.Y, T41.Y, literal.y, 4804; EG-NEXT: LSHR T0.Z, T41.W, literal.y, 4805; EG-NEXT: LSHR T0.W, T48.Y, literal.y, BS:VEC_120/SCL_212 4806; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 4807; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4808; EG-NEXT: 160(2.242078e-43), 0(0.000000e+00) 4809; EG-NEXT: LSHR T54.X, PS, literal.x, 4810; EG-NEXT: LSHR T1.Y, T48.W, literal.y, 4811; EG-NEXT: LSHR T1.Z, T47.Y, literal.y, 4812; EG-NEXT: LSHR T1.W, T47.W, literal.y, BS:VEC_120/SCL_212 4813; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.z, 4814; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4815; EG-NEXT: 208(2.914701e-43), 0(0.000000e+00) 4816; EG-NEXT: LSHR T55.X, PS, literal.x, 4817; EG-NEXT: LSHR T2.Y, T46.Y, literal.y, 4818; EG-NEXT: LSHR T2.Z, T46.W, literal.y, 4819; EG-NEXT: LSHR T2.W, T45.Y, literal.y, BS:VEC_120/SCL_212 4820; EG-NEXT: ADD_INT * T3.W, KC0[2].Y, literal.z, 4821; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4822; EG-NEXT: 192(2.690493e-43), 0(0.000000e+00) 4823; EG-NEXT: LSHR T56.X, PS, literal.x, 4824; EG-NEXT: LSHR T3.Y, T45.W, literal.y, 4825; EG-NEXT: BFE_INT T57.Z, T44.W, 0.0, literal.y, BS:VEC_120/SCL_212 4826; EG-NEXT: LSHR T3.W, T43.Y, literal.y, 4827; EG-NEXT: LSHR * T4.W, T43.W, literal.y, 4828; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4829; EG-NEXT: BFE_INT T57.X, T44.Z, 0.0, literal.x, 4830; EG-NEXT: LSHR T4.Y, T42.Y, literal.x, 4831; EG-NEXT: BFE_INT T58.Z, T44.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4832; EG-NEXT: LSHR T5.W, T42.W, literal.x, 4833; EG-NEXT: LSHR * T6.W, T44.W, literal.x, 4834; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4835; EG-NEXT: BFE_INT T58.X, T44.X, 0.0, literal.x, 4836; EG-NEXT: LSHR T5.Y, T44.Y, literal.x, 4837; EG-NEXT: BFE_INT T59.Z, T42.W, 0.0, literal.x, 4838; EG-NEXT: BFE_INT T57.W, PS, 0.0, literal.x, 4839; EG-NEXT: LSHR * T6.W, T44.Z, literal.x, 4840; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4841; EG-NEXT: BFE_INT T59.X, T42.Z, 0.0, literal.x, 4842; EG-NEXT: BFE_INT T57.Y, PS, 0.0, literal.x, 4843; EG-NEXT: BFE_INT T44.Z, T42.Y, 0.0, literal.x, 4844; EG-NEXT: BFE_INT T58.W, PV.Y, 0.0, literal.x, 4845; EG-NEXT: LSHR * T6.W, T44.X, literal.x, 4846; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4847; EG-NEXT: BFE_INT T44.X, T42.X, 0.0, literal.x, 4848; EG-NEXT: BFE_INT T58.Y, PS, 0.0, literal.x, 4849; EG-NEXT: BFE_INT T60.Z, T43.W, 0.0, literal.x, 4850; EG-NEXT: BFE_INT T59.W, T5.W, 0.0, literal.x, BS:VEC_120/SCL_212 4851; EG-NEXT: LSHR * T5.W, T42.Z, literal.x, 4852; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4853; EG-NEXT: BFE_INT T60.X, T43.Z, 0.0, literal.x, 4854; EG-NEXT: BFE_INT T59.Y, PS, 0.0, literal.x, 4855; EG-NEXT: BFE_INT T42.Z, T43.Y, 0.0, literal.x, 4856; EG-NEXT: BFE_INT T44.W, T4.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4857; EG-NEXT: LSHR * T5.W, T42.X, literal.x, 4858; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4859; EG-NEXT: BFE_INT T42.X, T43.X, 0.0, literal.x, 4860; EG-NEXT: BFE_INT T44.Y, PS, 0.0, literal.x, 4861; EG-NEXT: BFE_INT T61.Z, T45.W, 0.0, literal.x, 4862; EG-NEXT: BFE_INT * T60.W, T4.W, 0.0, literal.x, BS:VEC_120/SCL_212 4863; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4864; EG-NEXT: ALU clause starting at 133: 4865; EG-NEXT: LSHR * T4.W, T43.Z, literal.x, 4866; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4867; EG-NEXT: BFE_INT T61.X, T45.Z, 0.0, literal.x, 4868; EG-NEXT: BFE_INT T60.Y, PV.W, 0.0, literal.x, 4869; EG-NEXT: BFE_INT T43.Z, T45.Y, 0.0, literal.x, 4870; EG-NEXT: BFE_INT T42.W, T3.W, 0.0, literal.x, 4871; EG-NEXT: LSHR * T3.W, T43.X, literal.x, 4872; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4873; EG-NEXT: BFE_INT T43.X, T45.X, 0.0, literal.x, 4874; EG-NEXT: BFE_INT T42.Y, PS, 0.0, literal.x, 4875; EG-NEXT: BFE_INT T62.Z, T46.W, 0.0, literal.x, 4876; EG-NEXT: BFE_INT T61.W, T3.Y, 0.0, literal.x, 4877; EG-NEXT: LSHR * T3.W, T45.Z, literal.x, 4878; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4879; EG-NEXT: BFE_INT T62.X, T46.Z, 0.0, literal.x, 4880; EG-NEXT: BFE_INT T61.Y, PS, 0.0, literal.x, 4881; EG-NEXT: BFE_INT T45.Z, T46.Y, 0.0, literal.x, 4882; EG-NEXT: BFE_INT T43.W, T2.W, 0.0, literal.x, 4883; EG-NEXT: LSHR * T2.W, T45.X, literal.x, 4884; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4885; EG-NEXT: BFE_INT T45.X, T46.X, 0.0, literal.x, 4886; EG-NEXT: BFE_INT T43.Y, PS, 0.0, literal.x, 4887; EG-NEXT: BFE_INT T63.Z, T47.W, 0.0, literal.x, 4888; EG-NEXT: BFE_INT T62.W, T2.Z, 0.0, literal.x, 4889; EG-NEXT: LSHR * T2.W, T46.Z, literal.x, 4890; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4891; EG-NEXT: BFE_INT T63.X, T47.Z, 0.0, literal.x, 4892; EG-NEXT: BFE_INT T62.Y, PS, 0.0, literal.x, 4893; EG-NEXT: BFE_INT T46.Z, T47.Y, 0.0, literal.x, 4894; EG-NEXT: BFE_INT T45.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212 4895; EG-NEXT: LSHR * T2.W, T46.X, literal.x, 4896; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4897; EG-NEXT: BFE_INT T46.X, T47.X, 0.0, literal.x, 4898; EG-NEXT: BFE_INT T45.Y, PS, 0.0, literal.x, 4899; EG-NEXT: BFE_INT T64.Z, T48.W, 0.0, literal.x, 4900; EG-NEXT: BFE_INT T63.W, T1.W, 0.0, literal.x, BS:VEC_120/SCL_212 4901; EG-NEXT: LSHR * T1.W, T47.Z, literal.x, 4902; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4903; EG-NEXT: BFE_INT T64.X, T48.Z, 0.0, literal.x, 4904; EG-NEXT: BFE_INT T63.Y, PS, 0.0, literal.x, 4905; EG-NEXT: BFE_INT T47.Z, T48.Y, 0.0, literal.x, 4906; EG-NEXT: BFE_INT T46.W, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212 4907; EG-NEXT: LSHR * T1.W, T47.X, literal.x, 4908; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4909; EG-NEXT: BFE_INT T47.X, T48.X, 0.0, literal.x, 4910; EG-NEXT: BFE_INT T46.Y, PS, 0.0, literal.x, 4911; EG-NEXT: BFE_INT T65.Z, T41.W, 0.0, literal.x, 4912; EG-NEXT: BFE_INT T64.W, T1.Y, 0.0, literal.x, 4913; EG-NEXT: LSHR * T1.W, T48.Z, literal.x, 4914; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4915; EG-NEXT: BFE_INT T65.X, T41.Z, 0.0, literal.x, 4916; EG-NEXT: BFE_INT T64.Y, PS, 0.0, literal.x, 4917; EG-NEXT: BFE_INT T48.Z, T41.Y, 0.0, literal.x, 4918; EG-NEXT: BFE_INT T47.W, T0.W, 0.0, literal.x, 4919; EG-NEXT: LSHR * T0.W, T48.X, literal.x, 4920; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 4921; EG-NEXT: BFE_INT T48.X, T41.X, 0.0, literal.x, 4922; EG-NEXT: BFE_INT T47.Y, PS, 0.0, literal.x, 4923; EG-NEXT: LSHR T1.Z, T41.Z, literal.x, 4924; EG-NEXT: BFE_INT T65.W, T0.Z, 0.0, literal.x, BS:VEC_120/SCL_212 4925; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4926; EG-NEXT: 16(2.242078e-44), 240(3.363116e-43) 4927; EG-NEXT: LSHR T66.X, PS, literal.x, 4928; EG-NEXT: BFE_INT T65.Y, PV.Z, 0.0, literal.y, 4929; EG-NEXT: LSHR T0.Z, T41.X, literal.y, 4930; EG-NEXT: BFE_INT T48.W, T0.Y, 0.0, literal.y, 4931; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 4932; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4933; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00) 4934; EG-NEXT: LSHR T41.X, PS, literal.x, 4935; EG-NEXT: BFE_INT * T48.Y, PV.Z, 0.0, literal.y, 4936; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4937; 4938; CM-LABEL: global_sextload_v64i16_to_v64i32: 4939; CM: ; %bb.0: 4940; CM-NEXT: ALU 0, @40, KC0[CB0:0-32], KC1[] 4941; CM-NEXT: TEX 1 @24 4942; CM-NEXT: ALU 15, @41, KC0[CB0:0-32], KC1[] 4943; CM-NEXT: TEX 5 @28 4944; CM-NEXT: ALU 82, @57, KC0[CB0:0-32], KC1[] 4945; CM-NEXT: ALU 72, @140, KC0[CB0:0-32], KC1[] 4946; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T65, T66.X 4947; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T36.X 4948; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T64, T56.X 4949; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T37, T55.X 4950; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T63, T54.X 4951; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T45, T53.X 4952; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T62, T52.X 4953; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T44, T51.X 4954; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T61, T50.X 4955; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T43, T49.X 4956; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T60, T48.X 4957; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T42, T47.X 4958; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T59, T46.X 4959; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T41, T40.X 4960; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T58, T39.X 4961; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T57, T38.X 4962; CM-NEXT: CF_END 4963; CM-NEXT: PAD 4964; CM-NEXT: Fetch clause starting at 24: 4965; CM-NEXT: VTX_READ_128 T35.XYZW, T37.X, 16, #1 4966; CM-NEXT: VTX_READ_128 T36.XYZW, T37.X, 0, #1 4967; CM-NEXT: Fetch clause starting at 28: 4968; CM-NEXT: VTX_READ_128 T41.XYZW, T37.X, 112, #1 4969; CM-NEXT: VTX_READ_128 T42.XYZW, T37.X, 96, #1 4970; CM-NEXT: VTX_READ_128 T43.XYZW, T37.X, 80, #1 4971; CM-NEXT: VTX_READ_128 T44.XYZW, T37.X, 64, #1 4972; CM-NEXT: VTX_READ_128 T45.XYZW, T37.X, 48, #1 4973; CM-NEXT: VTX_READ_128 T37.XYZW, T37.X, 32, #1 4974; CM-NEXT: ALU clause starting at 40: 4975; CM-NEXT: MOV * T37.X, KC0[2].Z, 4976; CM-NEXT: ALU clause starting at 41: 4977; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 4978; CM-NEXT: 224(3.138909e-43), 0(0.000000e+00) 4979; CM-NEXT: LSHR T38.X, PV.W, literal.x, 4980; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 4981; CM-NEXT: 2(2.802597e-45), 240(3.363116e-43) 4982; CM-NEXT: LSHR T39.X, PV.W, literal.x, 4983; CM-NEXT: LSHR T0.Y, T36.Z, literal.y, 4984; CM-NEXT: LSHR T0.Z, T36.W, literal.y, 4985; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 4986; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4987; CM-NEXT: 192(2.690493e-43), 0(0.000000e+00) 4988; CM-NEXT: LSHR T40.X, PV.W, literal.x, 4989; CM-NEXT: LSHR T1.Y, T36.Y, literal.y, 4990; CM-NEXT: LSHR T1.Z, T35.Z, literal.y, 4991; CM-NEXT: LSHR * T0.W, T35.W, literal.y, 4992; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 4993; CM-NEXT: ALU clause starting at 57: 4994; CM-NEXT: LSHR T2.Z, T35.X, literal.x, 4995; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.y, 4996; CM-NEXT: 16(2.242078e-44), 208(2.914701e-43) 4997; CM-NEXT: LSHR T46.X, PV.W, literal.x, 4998; CM-NEXT: LSHR T2.Y, T35.Y, literal.y, 4999; CM-NEXT: LSHR T3.Z, T37.Z, literal.y, 5000; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5001; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5002; CM-NEXT: 160(2.242078e-43), 0(0.000000e+00) 5003; CM-NEXT: LSHR T47.X, PV.W, literal.x, 5004; CM-NEXT: LSHR T3.Y, T37.W, literal.y, 5005; CM-NEXT: LSHR T4.Z, T37.X, literal.y, 5006; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5007; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5008; CM-NEXT: 176(2.466285e-43), 0(0.000000e+00) 5009; CM-NEXT: LSHR T48.X, PV.W, literal.x, 5010; CM-NEXT: LSHR T4.Y, T37.Y, literal.y, 5011; CM-NEXT: LSHR T5.Z, T45.Z, literal.y, 5012; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5013; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5014; CM-NEXT: 128(1.793662e-43), 0(0.000000e+00) 5015; CM-NEXT: LSHR T49.X, PV.W, literal.x, 5016; CM-NEXT: LSHR T5.Y, T45.W, literal.y, 5017; CM-NEXT: LSHR T6.Z, T45.X, literal.y, 5018; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5019; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5020; CM-NEXT: 144(2.017870e-43), 0(0.000000e+00) 5021; CM-NEXT: LSHR T50.X, PV.W, literal.x, 5022; CM-NEXT: LSHR T6.Y, T45.Y, literal.y, 5023; CM-NEXT: LSHR T7.Z, T44.Z, literal.y, 5024; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5025; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5026; CM-NEXT: 96(1.345247e-43), 0(0.000000e+00) 5027; CM-NEXT: LSHR T51.X, PV.W, literal.x, 5028; CM-NEXT: LSHR T7.Y, T44.W, literal.y, 5029; CM-NEXT: LSHR T8.Z, T44.X, literal.y, 5030; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5031; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5032; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00) 5033; CM-NEXT: LSHR T52.X, PV.W, literal.x, 5034; CM-NEXT: LSHR T8.Y, T44.Y, literal.y, 5035; CM-NEXT: LSHR T9.Z, T43.Z, literal.y, 5036; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5037; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5038; CM-NEXT: 64(8.968310e-44), 0(0.000000e+00) 5039; CM-NEXT: LSHR T53.X, PV.W, literal.x, 5040; CM-NEXT: LSHR T9.Y, T43.W, literal.y, 5041; CM-NEXT: LSHR T10.Z, T43.X, literal.y, 5042; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5043; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5044; CM-NEXT: 80(1.121039e-43), 0(0.000000e+00) 5045; CM-NEXT: LSHR T54.X, PV.W, literal.x, 5046; CM-NEXT: LSHR T10.Y, T43.Y, literal.y, 5047; CM-NEXT: LSHR T11.Z, T42.Z, literal.y, 5048; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5049; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5050; CM-NEXT: 32(4.484155e-44), 0(0.000000e+00) 5051; CM-NEXT: LSHR T55.X, PV.W, literal.x, 5052; CM-NEXT: LSHR T11.Y, T42.W, literal.y, 5053; CM-NEXT: LSHR T12.Z, T42.X, literal.y, 5054; CM-NEXT: ADD_INT * T1.W, KC0[2].Y, literal.z, 5055; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5056; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00) 5057; CM-NEXT: LSHR T56.X, PV.W, literal.x, 5058; CM-NEXT: LSHR T12.Y, T42.Y, literal.y, 5059; CM-NEXT: BFE_INT T57.Z, T41.Y, 0.0, literal.y, BS:VEC_120/SCL_212 5060; CM-NEXT: LSHR * T1.W, T41.Z, literal.y, 5061; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5062; CM-NEXT: BFE_INT T57.X, T41.X, 0.0, literal.x, 5063; CM-NEXT: LSHR T13.Y, T41.W, literal.x, 5064; CM-NEXT: BFE_INT T58.Z, T41.W, 0.0, literal.x, 5065; CM-NEXT: LSHR * T2.W, T41.Y, literal.x, 5066; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5067; CM-NEXT: BFE_INT T58.X, T41.Z, 0.0, literal.x, 5068; CM-NEXT: LSHR T14.Y, T41.X, literal.x, 5069; CM-NEXT: BFE_INT T41.Z, T42.Y, 0.0, literal.x, 5070; CM-NEXT: BFE_INT * T57.W, PV.W, 0.0, literal.x, 5071; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5072; CM-NEXT: BFE_INT T41.X, T42.X, 0.0, literal.x, 5073; CM-NEXT: BFE_INT T57.Y, PV.Y, 0.0, literal.x, 5074; CM-NEXT: BFE_INT T59.Z, T42.W, 0.0, literal.x, 5075; CM-NEXT: BFE_INT * T58.W, T13.Y, 0.0, literal.x, 5076; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5077; CM-NEXT: ALU clause starting at 140: 5078; CM-NEXT: BFE_INT T59.X, T42.Z, 0.0, literal.x, 5079; CM-NEXT: BFE_INT T58.Y, T1.W, 0.0, literal.x, 5080; CM-NEXT: BFE_INT T42.Z, T43.Y, 0.0, literal.x, 5081; CM-NEXT: BFE_INT * T41.W, T12.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5082; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5083; CM-NEXT: BFE_INT T42.X, T43.X, 0.0, literal.x, 5084; CM-NEXT: BFE_INT T41.Y, T12.Z, 0.0, literal.x, 5085; CM-NEXT: BFE_INT T60.Z, T43.W, 0.0, literal.x, 5086; CM-NEXT: BFE_INT * T59.W, T11.Y, 0.0, literal.x, 5087; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5088; CM-NEXT: BFE_INT T60.X, T43.Z, 0.0, literal.x, 5089; CM-NEXT: BFE_INT T59.Y, T11.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5090; CM-NEXT: BFE_INT T43.Z, T44.Y, 0.0, literal.x, 5091; CM-NEXT: BFE_INT * T42.W, T10.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5092; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5093; CM-NEXT: BFE_INT T43.X, T44.X, 0.0, literal.x, 5094; CM-NEXT: BFE_INT T42.Y, T10.Z, 0.0, literal.x, 5095; CM-NEXT: BFE_INT T61.Z, T44.W, 0.0, literal.x, 5096; CM-NEXT: BFE_INT * T60.W, T9.Y, 0.0, literal.x, 5097; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5098; CM-NEXT: BFE_INT T61.X, T44.Z, 0.0, literal.x, 5099; CM-NEXT: BFE_INT T60.Y, T9.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5100; CM-NEXT: BFE_INT T44.Z, T45.Y, 0.0, literal.x, 5101; CM-NEXT: BFE_INT * T43.W, T8.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5102; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5103; CM-NEXT: BFE_INT T44.X, T45.X, 0.0, literal.x, 5104; CM-NEXT: BFE_INT T43.Y, T8.Z, 0.0, literal.x, 5105; CM-NEXT: BFE_INT T62.Z, T45.W, 0.0, literal.x, 5106; CM-NEXT: BFE_INT * T61.W, T7.Y, 0.0, literal.x, 5107; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5108; CM-NEXT: BFE_INT T62.X, T45.Z, 0.0, literal.x, 5109; CM-NEXT: BFE_INT T61.Y, T7.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5110; CM-NEXT: BFE_INT T45.Z, T37.Y, 0.0, literal.x, 5111; CM-NEXT: BFE_INT * T44.W, T6.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5112; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5113; CM-NEXT: BFE_INT T45.X, T37.X, 0.0, literal.x, 5114; CM-NEXT: BFE_INT T44.Y, T6.Z, 0.0, literal.x, 5115; CM-NEXT: BFE_INT T63.Z, T37.W, 0.0, literal.x, 5116; CM-NEXT: BFE_INT * T62.W, T5.Y, 0.0, literal.x, 5117; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5118; CM-NEXT: BFE_INT T63.X, T37.Z, 0.0, literal.x, 5119; CM-NEXT: BFE_INT T62.Y, T5.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5120; CM-NEXT: BFE_INT T37.Z, T35.Y, 0.0, literal.x, 5121; CM-NEXT: BFE_INT * T45.W, T4.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5122; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5123; CM-NEXT: BFE_INT T37.X, T35.X, 0.0, literal.x, 5124; CM-NEXT: BFE_INT T45.Y, T4.Z, 0.0, literal.x, 5125; CM-NEXT: BFE_INT T64.Z, T35.W, 0.0, literal.x, 5126; CM-NEXT: BFE_INT * T63.W, T3.Y, 0.0, literal.x, 5127; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5128; CM-NEXT: BFE_INT T64.X, T35.Z, 0.0, literal.x, 5129; CM-NEXT: BFE_INT T63.Y, T3.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5130; CM-NEXT: BFE_INT T35.Z, T36.Y, 0.0, literal.x, 5131; CM-NEXT: BFE_INT * T37.W, T2.Y, 0.0, literal.x, BS:VEC_120/SCL_212 5132; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5133; CM-NEXT: BFE_INT T35.X, T36.X, 0.0, literal.x, 5134; CM-NEXT: BFE_INT T37.Y, T2.Z, 0.0, literal.x, 5135; CM-NEXT: BFE_INT T65.Z, T36.W, 0.0, literal.x, 5136; CM-NEXT: BFE_INT * T64.W, T0.W, 0.0, literal.x, BS:VEC_120/SCL_212 5137; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5138; CM-NEXT: BFE_INT T65.X, T36.Z, 0.0, literal.x, 5139; CM-NEXT: BFE_INT T64.Y, T1.Z, 0.0, literal.x, BS:VEC_120/SCL_212 5140; CM-NEXT: LSHR T1.Z, T36.X, literal.x, 5141; CM-NEXT: BFE_INT * T35.W, T1.Y, 0.0, literal.x, 5142; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5143; CM-NEXT: LSHR T36.X, KC0[2].Y, literal.x, 5144; CM-NEXT: BFE_INT T35.Y, PV.Z, 0.0, literal.y, 5145; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.y, 5146; CM-NEXT: BFE_INT * T65.W, T0.Z, 0.0, literal.y, 5147; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5148; CM-NEXT: LSHR T66.X, PV.Z, literal.x, 5149; CM-NEXT: BFE_INT * T65.Y, T0.Y, 0.0, literal.y, 5150; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5151 %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 5152 %ext = sext <64 x i16> %load to <64 x i32> 5153 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 5154 ret void 5155} 5156 5157define amdgpu_kernel void @global_zextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 5158; GCN-NOHSA-SI-LABEL: global_zextload_i16_to_i64: 5159; GCN-NOHSA-SI: ; %bb.0: 5160; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5161; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5162; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5163; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5164; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5165; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5166; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5167; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5168; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5169; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5170; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5171; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5172; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5173; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5174; GCN-NOHSA-SI-NEXT: s_endpgm 5175; 5176; GCN-HSA-LABEL: global_zextload_i16_to_i64: 5177; GCN-HSA: ; %bb.0: 5178; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5179; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5180; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5181; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5182; GCN-HSA-NEXT: flat_load_ushort v0, v[0:1] 5183; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 5184; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 5185; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5186; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5187; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5188; GCN-HSA-NEXT: s_endpgm 5189; 5190; GCN-NOHSA-VI-LABEL: global_zextload_i16_to_i64: 5191; GCN-NOHSA-VI: ; %bb.0: 5192; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 5193; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5194; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5195; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5196; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5197; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5198; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5199; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5200; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5201; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5202; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5203; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5204; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5205; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 5206; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5207; GCN-NOHSA-VI-NEXT: s_endpgm 5208; 5209; EG-LABEL: global_zextload_i16_to_i64: 5210; EG: ; %bb.0: 5211; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5212; EG-NEXT: TEX 0 @6 5213; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5214; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5215; EG-NEXT: CF_END 5216; EG-NEXT: PAD 5217; EG-NEXT: Fetch clause starting at 6: 5218; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5219; EG-NEXT: ALU clause starting at 8: 5220; EG-NEXT: MOV * T0.X, KC0[2].Z, 5221; EG-NEXT: ALU clause starting at 9: 5222; EG-NEXT: MOV * T0.Y, 0.0, 5223; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5224; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5225; 5226; CM-LABEL: global_zextload_i16_to_i64: 5227; CM: ; %bb.0: 5228; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5229; CM-NEXT: TEX 0 @6 5230; CM-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5231; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 5232; CM-NEXT: CF_END 5233; CM-NEXT: PAD 5234; CM-NEXT: Fetch clause starting at 6: 5235; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5236; CM-NEXT: ALU clause starting at 8: 5237; CM-NEXT: MOV * T0.X, KC0[2].Z, 5238; CM-NEXT: ALU clause starting at 9: 5239; CM-NEXT: MOV * T0.Y, 0.0, 5240; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5241; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5242 %a = load i16, i16 addrspace(1)* %in 5243 %ext = zext i16 %a to i64 5244 store i64 %ext, i64 addrspace(1)* %out 5245 ret void 5246} 5247 5248; FIXME: Need to optimize this sequence to avoid extra bfe: 5249; t28: i32,ch = load<LD2[%in(addrspace=1)], anyext from i16> t12, t27, undef:i64 5250; t31: i64 = any_extend t28 5251; t33: i64 = sign_extend_inreg t31, ValueType:ch:i16 5252 5253; TODO: These could be expanded earlier using ASHR 15 5254define amdgpu_kernel void @global_sextload_i16_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in) #0 { 5255; GCN-NOHSA-SI-LABEL: global_sextload_i16_to_i64: 5256; GCN-NOHSA-SI: ; %bb.0: 5257; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5258; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5259; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5260; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5261; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5262; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5263; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5264; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5265; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 5266; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5267; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5268; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5269; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5270; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5271; GCN-NOHSA-SI-NEXT: s_endpgm 5272; 5273; GCN-HSA-LABEL: global_sextload_i16_to_i64: 5274; GCN-HSA: ; %bb.0: 5275; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5276; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5277; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5278; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5279; GCN-HSA-NEXT: flat_load_sshort v0, v[0:1] 5280; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 5281; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 5282; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5283; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5284; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5285; GCN-HSA-NEXT: s_endpgm 5286; 5287; GCN-NOHSA-VI-LABEL: global_sextload_i16_to_i64: 5288; GCN-NOHSA-VI: ; %bb.0: 5289; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 5290; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5291; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5292; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5293; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5294; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5295; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5296; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5297; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5298; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5299; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5300; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5301; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16 5302; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5303; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5304; GCN-NOHSA-VI-NEXT: s_endpgm 5305; 5306; EG-LABEL: global_sextload_i16_to_i64: 5307; EG: ; %bb.0: 5308; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5309; EG-NEXT: TEX 0 @6 5310; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5311; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5312; EG-NEXT: CF_END 5313; EG-NEXT: PAD 5314; EG-NEXT: Fetch clause starting at 6: 5315; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5316; EG-NEXT: ALU clause starting at 8: 5317; EG-NEXT: MOV * T0.X, KC0[2].Z, 5318; EG-NEXT: ALU clause starting at 9: 5319; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 5320; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 5321; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 5322; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 5323; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5324; 5325; CM-LABEL: global_sextload_i16_to_i64: 5326; CM: ; %bb.0: 5327; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5328; CM-NEXT: TEX 0 @6 5329; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5330; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 5331; CM-NEXT: CF_END 5332; CM-NEXT: PAD 5333; CM-NEXT: Fetch clause starting at 6: 5334; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5335; CM-NEXT: ALU clause starting at 8: 5336; CM-NEXT: MOV * T0.X, KC0[2].Z, 5337; CM-NEXT: ALU clause starting at 9: 5338; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x, 5339; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5340; CM-NEXT: LSHR T1.X, KC0[2].Y, literal.x, 5341; CM-NEXT: ASHR * T0.Y, PV.X, literal.y, 5342; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5343 %a = load i16, i16 addrspace(1)* %in 5344 %ext = sext i16 %a to i64 5345 store i64 %ext, i64 addrspace(1)* %out 5346 ret void 5347} 5348 5349define amdgpu_kernel void @global_zextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { 5350; GCN-NOHSA-SI-LABEL: global_zextload_v1i16_to_v1i64: 5351; GCN-NOHSA-SI: ; %bb.0: 5352; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5353; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5354; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5355; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5356; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5357; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5358; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5359; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5360; GCN-NOHSA-SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5361; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5362; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5363; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5364; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5365; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5366; GCN-NOHSA-SI-NEXT: s_endpgm 5367; 5368; GCN-HSA-LABEL: global_zextload_v1i16_to_v1i64: 5369; GCN-HSA: ; %bb.0: 5370; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5371; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5372; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5373; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5374; GCN-HSA-NEXT: flat_load_ushort v0, v[0:1] 5375; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 5376; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 5377; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5378; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5379; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5380; GCN-HSA-NEXT: s_endpgm 5381; 5382; GCN-NOHSA-VI-LABEL: global_zextload_v1i16_to_v1i64: 5383; GCN-NOHSA-VI: ; %bb.0: 5384; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 5385; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5386; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5387; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5388; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5389; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5390; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5391; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5392; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5393; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5394; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5395; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5396; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5397; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 5398; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5399; GCN-NOHSA-VI-NEXT: s_endpgm 5400; 5401; EG-LABEL: global_zextload_v1i16_to_v1i64: 5402; EG: ; %bb.0: 5403; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5404; EG-NEXT: TEX 0 @6 5405; EG-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5406; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5407; EG-NEXT: CF_END 5408; EG-NEXT: PAD 5409; EG-NEXT: Fetch clause starting at 6: 5410; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5411; EG-NEXT: ALU clause starting at 8: 5412; EG-NEXT: MOV * T0.X, KC0[2].Z, 5413; EG-NEXT: ALU clause starting at 9: 5414; EG-NEXT: MOV * T0.Y, 0.0, 5415; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5416; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5417; 5418; CM-LABEL: global_zextload_v1i16_to_v1i64: 5419; CM: ; %bb.0: 5420; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5421; CM-NEXT: TEX 0 @6 5422; CM-NEXT: ALU 2, @9, KC0[CB0:0-32], KC1[] 5423; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 5424; CM-NEXT: CF_END 5425; CM-NEXT: PAD 5426; CM-NEXT: Fetch clause starting at 6: 5427; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5428; CM-NEXT: ALU clause starting at 8: 5429; CM-NEXT: MOV * T0.X, KC0[2].Z, 5430; CM-NEXT: ALU clause starting at 9: 5431; CM-NEXT: MOV * T0.Y, 0.0, 5432; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 5433; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5434 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 5435 %ext = zext <1 x i16> %load to <1 x i64> 5436 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 5437 ret void 5438} 5439 5440; TODO: These could be expanded earlier using ASHR 15 5441define amdgpu_kernel void @global_sextload_v1i16_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i16> addrspace(1)* %in) #0 { 5442; GCN-NOHSA-SI-LABEL: global_sextload_v1i16_to_v1i64: 5443; GCN-NOHSA-SI: ; %bb.0: 5444; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5445; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5446; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5447; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5448; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5449; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5450; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5451; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5452; GCN-NOHSA-SI-NEXT: buffer_load_sshort v0, off, s[8:11], 0 5453; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5454; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5455; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5456; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5457; GCN-NOHSA-SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5458; GCN-NOHSA-SI-NEXT: s_endpgm 5459; 5460; GCN-HSA-LABEL: global_sextload_v1i16_to_v1i64: 5461; GCN-HSA: ; %bb.0: 5462; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5463; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5464; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5465; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5466; GCN-HSA-NEXT: flat_load_sshort v0, v[0:1] 5467; GCN-HSA-NEXT: v_mov_b32_e32 v2, s0 5468; GCN-HSA-NEXT: v_mov_b32_e32 v3, s1 5469; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5470; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5471; GCN-HSA-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 5472; GCN-HSA-NEXT: s_endpgm 5473; 5474; GCN-NOHSA-VI-LABEL: global_sextload_v1i16_to_v1i64: 5475; GCN-NOHSA-VI: ; %bb.0: 5476; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 5477; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5478; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5479; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5480; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5481; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5482; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5483; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5484; GCN-NOHSA-VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 5485; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5486; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5487; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5488; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v0, 0, 16 5489; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5490; GCN-NOHSA-VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 5491; GCN-NOHSA-VI-NEXT: s_endpgm 5492; 5493; EG-LABEL: global_sextload_v1i16_to_v1i64: 5494; EG: ; %bb.0: 5495; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5496; EG-NEXT: TEX 0 @6 5497; EG-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5498; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 5499; EG-NEXT: CF_END 5500; EG-NEXT: PAD 5501; EG-NEXT: Fetch clause starting at 6: 5502; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5503; EG-NEXT: ALU clause starting at 8: 5504; EG-NEXT: MOV * T0.X, KC0[2].Z, 5505; EG-NEXT: ALU clause starting at 9: 5506; EG-NEXT: BFE_INT T0.X, T0.X, 0.0, literal.x, 5507; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 5508; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 5509; EG-NEXT: ASHR * T0.Y, PV.X, literal.x, 5510; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5511; 5512; CM-LABEL: global_sextload_v1i16_to_v1i64: 5513; CM: ; %bb.0: 5514; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5515; CM-NEXT: TEX 0 @6 5516; CM-NEXT: ALU 4, @9, KC0[CB0:0-32], KC1[] 5517; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X 5518; CM-NEXT: CF_END 5519; CM-NEXT: PAD 5520; CM-NEXT: Fetch clause starting at 6: 5521; CM-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 5522; CM-NEXT: ALU clause starting at 8: 5523; CM-NEXT: MOV * T0.X, KC0[2].Z, 5524; CM-NEXT: ALU clause starting at 9: 5525; CM-NEXT: BFE_INT * T0.X, T0.X, 0.0, literal.x, 5526; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5527; CM-NEXT: LSHR T1.X, KC0[2].Y, literal.x, 5528; CM-NEXT: ASHR * T0.Y, PV.X, literal.y, 5529; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5530 %load = load <1 x i16>, <1 x i16> addrspace(1)* %in 5531 %ext = sext <1 x i16> %load to <1 x i64> 5532 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 5533 ret void 5534} 5535 5536define amdgpu_kernel void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { 5537; GCN-NOHSA-SI-LABEL: global_zextload_v2i16_to_v2i64: 5538; GCN-NOHSA-SI: ; %bb.0: 5539; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5540; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5541; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5542; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5543; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5544; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5545; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5546; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5547; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 5548; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5549; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5550; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5551; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5552; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5553; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 5554; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 5555; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5556; GCN-NOHSA-SI-NEXT: s_endpgm 5557; 5558; GCN-HSA-LABEL: global_zextload_v2i16_to_v2i64: 5559; GCN-HSA: ; %bb.0: 5560; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5561; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5562; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5563; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5564; GCN-HSA-NEXT: flat_load_dword v0, v[0:1] 5565; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5566; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5567; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5568; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 5569; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5570; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5571; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v0 5572; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5573; GCN-HSA-NEXT: s_endpgm 5574; 5575; GCN-NOHSA-VI-LABEL: global_zextload_v2i16_to_v2i64: 5576; GCN-NOHSA-VI: ; %bb.0: 5577; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 5578; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5579; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5580; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5581; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5582; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5583; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5584; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5585; GCN-NOHSA-VI-NEXT: buffer_load_dword v0, off, s[8:11], 0 5586; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5587; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5588; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5589; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 5590; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5591; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5592; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v0 5593; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5594; GCN-NOHSA-VI-NEXT: s_endpgm 5595; 5596; EG-LABEL: global_zextload_v2i16_to_v2i64: 5597; EG: ; %bb.0: 5598; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5599; EG-NEXT: TEX 0 @6 5600; EG-NEXT: ALU 6, @9, KC0[CB0:0-32], KC1[] 5601; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 5602; EG-NEXT: CF_END 5603; EG-NEXT: PAD 5604; EG-NEXT: Fetch clause starting at 6: 5605; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5606; EG-NEXT: ALU clause starting at 8: 5607; EG-NEXT: MOV * T4.X, KC0[2].Z, 5608; EG-NEXT: ALU clause starting at 9: 5609; EG-NEXT: LSHR * T4.Z, T4.X, literal.x, 5610; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5611; EG-NEXT: AND_INT T4.X, T4.X, literal.x, 5612; EG-NEXT: MOV T4.Y, 0.0, 5613; EG-NEXT: MOV T4.W, 0.0, 5614; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 5615; EG-NEXT: 65535(9.183409e-41), 2(2.802597e-45) 5616; 5617; CM-LABEL: global_zextload_v2i16_to_v2i64: 5618; CM: ; %bb.0: 5619; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5620; CM-NEXT: TEX 0 @6 5621; CM-NEXT: ALU 7, @9, KC0[CB0:0-32], KC1[] 5622; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X 5623; CM-NEXT: CF_END 5624; CM-NEXT: PAD 5625; CM-NEXT: Fetch clause starting at 6: 5626; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5627; CM-NEXT: ALU clause starting at 8: 5628; CM-NEXT: MOV * T4.X, KC0[2].Z, 5629; CM-NEXT: ALU clause starting at 9: 5630; CM-NEXT: LSHR * T4.Z, T4.X, literal.x, 5631; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5632; CM-NEXT: AND_INT T4.X, T4.X, literal.x, 5633; CM-NEXT: MOV T4.Y, 0.0, 5634; CM-NEXT: MOV * T4.W, 0.0, 5635; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 5636; CM-NEXT: LSHR * T5.X, KC0[2].Y, literal.x, 5637; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5638 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 5639 %ext = zext <2 x i16> %load to <2 x i64> 5640 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 5641 ret void 5642} 5643 5644define amdgpu_kernel void @global_sextload_v2i16_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) #0 { 5645; GCN-NOHSA-SI-LABEL: global_sextload_v2i16_to_v2i64: 5646; GCN-NOHSA-SI: ; %bb.0: 5647; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5648; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5649; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5650; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5651; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5652; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5653; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5654; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5655; GCN-NOHSA-SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 5656; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5657; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5658; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5659; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5660; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v0, 0, 16 5661; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5662; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v2, 0, 16 5663; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5664; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5665; GCN-NOHSA-SI-NEXT: s_endpgm 5666; 5667; GCN-HSA-LABEL: global_sextload_v2i16_to_v2i64: 5668; GCN-HSA: ; %bb.0: 5669; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5670; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5671; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5672; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5673; GCN-HSA-NEXT: flat_load_dword v0, v[0:1] 5674; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 5675; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 5676; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5677; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v0 5678; GCN-HSA-NEXT: v_bfe_i32 v0, v0, 0, 16 5679; GCN-HSA-NEXT: v_bfe_i32 v2, v2, 0, 16 5680; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5681; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5682; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 5683; GCN-HSA-NEXT: s_endpgm 5684; 5685; GCN-NOHSA-VI-LABEL: global_sextload_v2i16_to_v2i64: 5686; GCN-NOHSA-VI: ; %bb.0: 5687; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 5688; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5689; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5690; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5691; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5692; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5693; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5694; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5695; GCN-NOHSA-VI-NEXT: buffer_load_dword v1, off, s[8:11], 0 5696; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5697; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5698; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5699; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v1 5700; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16 5701; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v2, 0, 16 5702; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5703; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5704; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5705; GCN-NOHSA-VI-NEXT: s_endpgm 5706; 5707; EG-LABEL: global_sextload_v2i16_to_v2i64: 5708; EG: ; %bb.0: 5709; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5710; EG-NEXT: TEX 0 @6 5711; EG-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 5712; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T5.X, 1 5713; EG-NEXT: CF_END 5714; EG-NEXT: PAD 5715; EG-NEXT: Fetch clause starting at 6: 5716; EG-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5717; EG-NEXT: ALU clause starting at 8: 5718; EG-NEXT: MOV * T4.X, KC0[2].Z, 5719; EG-NEXT: ALU clause starting at 9: 5720; EG-NEXT: ASHR * T4.W, T4.X, literal.x, 5721; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5722; EG-NEXT: ASHR * T4.Z, T4.X, literal.x, 5723; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5724; EG-NEXT: BFE_INT T4.X, T4.X, 0.0, literal.x, 5725; EG-NEXT: LSHR * T5.X, KC0[2].Y, literal.y, 5726; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45) 5727; EG-NEXT: ASHR * T4.Y, PV.X, literal.x, 5728; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5729; 5730; CM-LABEL: global_sextload_v2i16_to_v2i64: 5731; CM: ; %bb.0: 5732; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5733; CM-NEXT: TEX 0 @6 5734; CM-NEXT: ALU 8, @9, KC0[CB0:0-32], KC1[] 5735; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T4, T5.X 5736; CM-NEXT: CF_END 5737; CM-NEXT: PAD 5738; CM-NEXT: Fetch clause starting at 6: 5739; CM-NEXT: VTX_READ_32 T4.X, T4.X, 0, #1 5740; CM-NEXT: ALU clause starting at 8: 5741; CM-NEXT: MOV * T4.X, KC0[2].Z, 5742; CM-NEXT: ALU clause starting at 9: 5743; CM-NEXT: ASHR * T4.W, T4.X, literal.x, 5744; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 5745; CM-NEXT: ASHR * T4.Z, T4.X, literal.x, 5746; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5747; CM-NEXT: BFE_INT * T4.X, T4.X, 0.0, literal.x, 5748; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5749; CM-NEXT: LSHR T5.X, KC0[2].Y, literal.x, 5750; CM-NEXT: ASHR * T4.Y, PV.X, literal.y, 5751; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 5752 %load = load <2 x i16>, <2 x i16> addrspace(1)* %in 5753 %ext = sext <2 x i16> %load to <2 x i64> 5754 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 5755 ret void 5756} 5757 5758define amdgpu_kernel void @global_zextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { 5759; GCN-NOHSA-SI-LABEL: global_zextload_v4i16_to_v4i64: 5760; GCN-NOHSA-SI: ; %bb.0: 5761; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5762; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5763; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5764; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5765; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5766; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5767; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5768; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5769; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[8:9], off, s[8:11], 0 5770; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 5771; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 5772; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v1 5773; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, v1 5774; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5775; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5776; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5777; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v9 5778; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v8 5779; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v8 5780; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v9 5781; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 5782; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 5783; GCN-NOHSA-SI-NEXT: s_endpgm 5784; 5785; GCN-HSA-LABEL: global_zextload_v4i16_to_v4i64: 5786; GCN-HSA: ; %bb.0: 5787; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5788; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5789; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5790; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5791; GCN-HSA-NEXT: flat_load_dwordx2 v[8:9], v[0:1] 5792; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5793; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5794; GCN-HSA-NEXT: v_mov_b32_e32 v1, 0 5795; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 5796; GCN-HSA-NEXT: v_mov_b32_e32 v3, v1 5797; GCN-HSA-NEXT: v_mov_b32_e32 v11, s1 5798; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 5799; GCN-HSA-NEXT: v_mov_b32_e32 v5, v1 5800; GCN-HSA-NEXT: v_mov_b32_e32 v7, v1 5801; GCN-HSA-NEXT: v_mov_b32_e32 v10, s0 5802; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5803; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v9 5804; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v9 5805; GCN-HSA-NEXT: v_lshrrev_b32_e32 v6, 16, v8 5806; GCN-HSA-NEXT: v_and_b32_e32 v4, 0xffff, v8 5807; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[0:3] 5808; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 5809; GCN-HSA-NEXT: s_endpgm 5810; 5811; GCN-NOHSA-VI-LABEL: global_zextload_v4i16_to_v4i64: 5812; GCN-NOHSA-VI: ; %bb.0: 5813; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 5814; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5815; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5816; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5817; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5818; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5819; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5820; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5821; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[8:9], off, s[8:11], 0 5822; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, 0 5823; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v1 5824; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5825; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5826; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, v1 5827; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, v1 5828; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5829; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v9 5830; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v9 5831; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v8 5832; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v8 5833; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 offset:16 5834; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 5835; GCN-NOHSA-VI-NEXT: s_endpgm 5836; 5837; EG-LABEL: global_zextload_v4i16_to_v4i64: 5838; EG: ; %bb.0: 5839; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5840; EG-NEXT: TEX 0 @6 5841; EG-NEXT: ALU 18, @9, KC0[CB0:0-32], KC1[] 5842; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T8.X, 0 5843; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T7.X, 1 5844; EG-NEXT: CF_END 5845; EG-NEXT: Fetch clause starting at 6: 5846; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 5847; EG-NEXT: ALU clause starting at 8: 5848; EG-NEXT: MOV * T5.X, KC0[2].Z, 5849; EG-NEXT: ALU clause starting at 9: 5850; EG-NEXT: MOV T2.X, T5.X, 5851; EG-NEXT: MOV * T3.X, T5.Y, 5852; EG-NEXT: MOV T0.Y, PV.X, 5853; EG-NEXT: MOV * T0.Z, PS, 5854; EG-NEXT: LSHR * T5.Z, PV.Z, literal.x, 5855; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5856; EG-NEXT: AND_INT T5.X, T0.Z, literal.x, 5857; EG-NEXT: MOV T5.Y, 0.0, 5858; EG-NEXT: LSHR T6.Z, T0.Y, literal.y, 5859; EG-NEXT: AND_INT * T6.X, T0.Y, literal.x, 5860; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5861; EG-NEXT: MOV T6.Y, 0.0, 5862; EG-NEXT: MOV T5.W, 0.0, 5863; EG-NEXT: MOV * T6.W, 0.0, 5864; EG-NEXT: LSHR T7.X, KC0[2].Y, literal.x, 5865; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 5866; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 5867; EG-NEXT: LSHR * T8.X, PV.W, literal.x, 5868; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5869; 5870; CM-LABEL: global_zextload_v4i16_to_v4i64: 5871; CM: ; %bb.0: 5872; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5873; CM-NEXT: TEX 0 @6 5874; CM-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[] 5875; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T8.X 5876; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T6, T7.X 5877; CM-NEXT: CF_END 5878; CM-NEXT: Fetch clause starting at 6: 5879; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 5880; CM-NEXT: ALU clause starting at 8: 5881; CM-NEXT: MOV * T5.X, KC0[2].Z, 5882; CM-NEXT: ALU clause starting at 9: 5883; CM-NEXT: MOV * T2.X, T5.X, 5884; CM-NEXT: MOV * T3.X, T5.Y, 5885; CM-NEXT: MOV T0.Y, PV.X, 5886; CM-NEXT: MOV * T0.Z, T2.X, 5887; CM-NEXT: LSHR * T5.Z, PV.Z, literal.x, 5888; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5889; CM-NEXT: AND_INT T5.X, T0.Z, literal.x, 5890; CM-NEXT: MOV T5.Y, 0.0, 5891; CM-NEXT: LSHR * T6.Z, T0.Y, literal.y, 5892; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 5893; CM-NEXT: AND_INT T6.X, T0.Y, literal.x, 5894; CM-NEXT: MOV T6.Y, 0.0, 5895; CM-NEXT: MOV * T5.W, 0.0, 5896; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 5897; CM-NEXT: MOV * T6.W, 0.0, 5898; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 5899; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 5900; CM-NEXT: LSHR * T7.X, PV.W, literal.x, 5901; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5902; CM-NEXT: LSHR * T8.X, KC0[2].Y, literal.x, 5903; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 5904 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 5905 %ext = zext <4 x i16> %load to <4 x i64> 5906 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 5907 ret void 5908} 5909 5910define amdgpu_kernel void @global_sextload_v4i16_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) #0 { 5911; GCN-NOHSA-SI-LABEL: global_sextload_v4i16_to_v4i64: 5912; GCN-NOHSA-SI: ; %bb.0: 5913; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 5914; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 5915; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 5916; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 5917; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 5918; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 5919; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 5920; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 5921; GCN-NOHSA-SI-NEXT: buffer_load_dwordx2 v[1:2], off, s[8:11], 0 5922; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 5923; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 5924; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 5925; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v2 5926; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v5, 16, v1 5927; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v1, 0, 16 5928; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[6:7], v[1:2], 48 5929; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v3, 0, 16 5930; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5931; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v5, 0, 16 5932; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 5933; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5934; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 5935; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5936; GCN-NOHSA-SI-NEXT: s_endpgm 5937; 5938; GCN-HSA-LABEL: global_sextload_v4i16_to_v4i64: 5939; GCN-HSA: ; %bb.0: 5940; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 5941; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 5942; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 5943; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 5944; GCN-HSA-NEXT: flat_load_dwordx2 v[1:2], v[0:1] 5945; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 5946; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 5947; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3 5948; GCN-HSA-NEXT: v_mov_b32_e32 v9, s1 5949; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2 5950; GCN-HSA-NEXT: v_mov_b32_e32 v8, s0 5951; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 5952; GCN-HSA-NEXT: v_mov_b32_e32 v3, v2 5953; GCN-HSA-NEXT: v_lshrrev_b32_e32 v4, 16, v1 5954; GCN-HSA-NEXT: v_ashr_i64 v[6:7], v[1:2], 48 5955; GCN-HSA-NEXT: v_bfe_i32 v2, v4, 0, 16 5956; GCN-HSA-NEXT: v_bfe_i32 v4, v3, 0, 16 5957; GCN-HSA-NEXT: v_bfe_i32 v0, v1, 0, 16 5958; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4 5959; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5960; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5961; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[4:7] 5962; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[0:3] 5963; GCN-HSA-NEXT: s_endpgm 5964; 5965; GCN-NOHSA-VI-LABEL: global_sextload_v4i16_to_v4i64: 5966; GCN-NOHSA-VI: ; %bb.0: 5967; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 5968; GCN-NOHSA-VI-NEXT: s_mov_b32 s7, 0xf000 5969; GCN-NOHSA-VI-NEXT: s_mov_b32 s6, -1 5970; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s6 5971; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s7 5972; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 5973; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s2 5974; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s3 5975; GCN-NOHSA-VI-NEXT: buffer_load_dwordx2 v[1:2], off, s[8:11], 0 5976; GCN-NOHSA-VI-NEXT: s_mov_b32 s4, s0 5977; GCN-NOHSA-VI-NEXT: s_mov_b32 s5, s1 5978; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 5979; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, v2 5980; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v2 5981; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v1 5982; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v4, 0, 16 5983; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v5, 0, 16 5984; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16 5985; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v3, 0, 16 5986; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 5987; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 5988; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 5989; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 5990; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 5991; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 5992; GCN-NOHSA-VI-NEXT: s_endpgm 5993; 5994; EG-LABEL: global_sextload_v4i16_to_v4i64: 5995; EG: ; %bb.0: 5996; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 5997; EG-NEXT: TEX 0 @6 5998; EG-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[] 5999; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T8.X, 0 6000; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T5.XYZW, T6.X, 1 6001; EG-NEXT: CF_END 6002; EG-NEXT: Fetch clause starting at 6: 6003; EG-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 6004; EG-NEXT: ALU clause starting at 8: 6005; EG-NEXT: MOV * T5.X, KC0[2].Z, 6006; EG-NEXT: ALU clause starting at 9: 6007; EG-NEXT: MOV T2.X, T5.X, 6008; EG-NEXT: MOV * T3.X, T5.Y, 6009; EG-NEXT: MOV T0.Y, PS, 6010; EG-NEXT: MOV * T0.Z, PV.X, 6011; EG-NEXT: ASHR * T5.W, PV.Z, literal.x, 6012; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6013; EG-NEXT: LSHR T6.X, KC0[2].Y, literal.x, 6014; EG-NEXT: ASHR T5.Z, T0.Z, literal.y, 6015; EG-NEXT: ASHR * T7.W, T0.Y, literal.z, 6016; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6017; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6018; EG-NEXT: BFE_INT T5.X, T0.Z, 0.0, literal.x, 6019; EG-NEXT: ASHR * T7.Z, T0.Y, literal.x, 6020; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6021; EG-NEXT: BFE_INT T7.X, T0.Y, 0.0, literal.x, 6022; EG-NEXT: ASHR T5.Y, PV.X, literal.y, 6023; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 6024; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6025; EG-NEXT: LSHR T8.X, PV.W, literal.x, 6026; EG-NEXT: ASHR * T7.Y, PV.X, literal.y, 6027; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6028; 6029; CM-LABEL: global_sextload_v4i16_to_v4i64: 6030; CM: ; %bb.0: 6031; CM-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 6032; CM-NEXT: TEX 0 @6 6033; CM-NEXT: ALU 20, @9, KC0[CB0:0-32], KC1[] 6034; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T8.X 6035; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T5, T6.X 6036; CM-NEXT: CF_END 6037; CM-NEXT: Fetch clause starting at 6: 6038; CM-NEXT: VTX_READ_64 T5.XY, T5.X, 0, #1 6039; CM-NEXT: ALU clause starting at 8: 6040; CM-NEXT: MOV * T5.X, KC0[2].Z, 6041; CM-NEXT: ALU clause starting at 9: 6042; CM-NEXT: MOV * T2.X, T5.X, 6043; CM-NEXT: MOV T3.X, T5.Y, 6044; CM-NEXT: MOV * T0.Y, PV.X, 6045; CM-NEXT: MOV * T0.Z, PV.X, 6046; CM-NEXT: ADD_INT T1.Z, KC0[2].Y, literal.x, 6047; CM-NEXT: ASHR * T5.W, PV.Z, literal.y, 6048; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6049; CM-NEXT: LSHR T6.X, PV.Z, literal.x, 6050; CM-NEXT: ASHR T5.Z, T0.Z, literal.y, 6051; CM-NEXT: ASHR * T7.W, T0.Y, literal.z, 6052; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6053; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6054; CM-NEXT: BFE_INT T5.X, T0.Z, 0.0, literal.x, 6055; CM-NEXT: ASHR * T7.Z, T0.Y, literal.x, 6056; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6057; CM-NEXT: BFE_INT T7.X, T0.Y, 0.0, literal.x, 6058; CM-NEXT: ASHR * T5.Y, PV.X, literal.y, 6059; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6060; CM-NEXT: LSHR T8.X, KC0[2].Y, literal.x, 6061; CM-NEXT: ASHR * T7.Y, PV.X, literal.y, 6062; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6063 %load = load <4 x i16>, <4 x i16> addrspace(1)* %in 6064 %ext = sext <4 x i16> %load to <4 x i64> 6065 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 6066 ret void 6067} 6068 6069define amdgpu_kernel void @global_zextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { 6070; GCN-NOHSA-SI-LABEL: global_zextload_v8i16_to_v8i64: 6071; GCN-NOHSA-SI: ; %bb.0: 6072; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 6073; GCN-NOHSA-SI-NEXT: s_mov_b32 s7, 0xf000 6074; GCN-NOHSA-SI-NEXT: s_mov_b32 s6, -1 6075; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s6 6076; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s7 6077; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6078; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s2 6079; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s3 6080; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6081; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, 0 6082; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, 0 6083; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, 0 6084; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, v9 6085; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, v9 6086; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, v9 6087; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v9 6088; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v9 6089; GCN-NOHSA-SI-NEXT: s_mov_b32 s4, s0 6090; GCN-NOHSA-SI-NEXT: s_mov_b32 s5, s1 6091; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 6092; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v1 6093; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v10, 16, v3 6094; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v14, 16, v2 6095; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v0 6096; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v0 6097; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, 0xffff, v2 6098; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v1 6099; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v3 6100; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[4:7], 0 offset:48 6101; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[4:7], 0 offset:16 6102; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[4:7], 0 offset:32 6103; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 6104; GCN-NOHSA-SI-NEXT: s_endpgm 6105; 6106; GCN-HSA-LABEL: global_zextload_v8i16_to_v8i64: 6107; GCN-HSA: ; %bb.0: 6108; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 6109; GCN-HSA-NEXT: v_mov_b32_e32 v12, 0 6110; GCN-HSA-NEXT: v_mov_b32_e32 v14, v12 6111; GCN-HSA-NEXT: v_mov_b32_e32 v15, v12 6112; GCN-HSA-NEXT: v_mov_b32_e32 v8, v12 6113; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6114; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6115; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6116; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 6117; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6118; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6119; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6120; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6121; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 6122; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1 6123; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6124; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0 6125; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 6126; GCN-HSA-NEXT: s_add_u32 s0, s0, 32 6127; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 6128; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 6129; GCN-HSA-NEXT: v_mov_b32_e32 v6, 0 6130; GCN-HSA-NEXT: v_mov_b32_e32 v10, 0 6131; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 6132; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v3 6133; GCN-HSA-NEXT: v_and_b32_e32 v11, 0xffff, v3 6134; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[11:14] 6135; GCN-HSA-NEXT: v_mov_b32_e32 v4, v12 6136; GCN-HSA-NEXT: v_mov_b32_e32 v13, v12 6137; GCN-HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v1 6138; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v0 6139; GCN-HSA-NEXT: v_and_b32_e32 v3, 0xffff, v0 6140; GCN-HSA-NEXT: v_and_b32_e32 v12, 0xffff, v1 6141; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 6142; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v2 6143; GCN-HSA-NEXT: v_and_b32_e32 v7, 0xffff, v2 6144; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 6145; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[12:15] 6146; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[7:10] 6147; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[3:6] 6148; GCN-HSA-NEXT: s_endpgm 6149; 6150; GCN-NOHSA-VI-LABEL: global_zextload_v8i16_to_v8i64: 6151; GCN-NOHSA-VI: ; %bb.0: 6152; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 6153; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 6154; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 6155; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 6156; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 6157; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6158; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 6159; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 6160; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6161; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v19, 0 6162; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v17, 0 6163; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 6164; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 6165; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, 0 6166; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, 0 6167; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v15, 0 6168; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v17 6169; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, v17 6170; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, v17 6171; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 6172; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, 0xffff, v3 6173; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v18, 16, v3 6174; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v0 6175; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v0 6176; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v1 6177; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v10, 16, v1 6178; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, 0xffff, v2 6179; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v14, 16, v2 6180; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 6181; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32 6182; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 6183; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 6184; GCN-NOHSA-VI-NEXT: s_endpgm 6185; 6186; EG-LABEL: global_zextload_v8i16_to_v8i64: 6187; EG: ; %bb.0: 6188; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6189; EG-NEXT: TEX 0 @8 6190; EG-NEXT: ALU 30, @11, KC0[CB0:0-32], KC1[] 6191; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T8.XYZW, T14.X, 0 6192; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T9.XYZW, T13.X, 0 6193; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T12.X, 0 6194; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T7.XYZW, T11.X, 1 6195; EG-NEXT: CF_END 6196; EG-NEXT: Fetch clause starting at 8: 6197; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 6198; EG-NEXT: ALU clause starting at 10: 6199; EG-NEXT: MOV * T7.X, KC0[2].Z, 6200; EG-NEXT: ALU clause starting at 11: 6201; EG-NEXT: LSHR * T8.Z, T7.W, literal.x, 6202; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6203; EG-NEXT: AND_INT T8.X, T7.W, literal.x, 6204; EG-NEXT: MOV T8.Y, 0.0, 6205; EG-NEXT: LSHR T9.Z, T7.Z, literal.y, 6206; EG-NEXT: AND_INT * T9.X, T7.Z, literal.x, 6207; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6208; EG-NEXT: MOV T9.Y, 0.0, 6209; EG-NEXT: LSHR * T10.Z, T7.Y, literal.x, 6210; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6211; EG-NEXT: AND_INT T10.X, T7.Y, literal.x, 6212; EG-NEXT: MOV T10.Y, 0.0, 6213; EG-NEXT: LSHR T7.Z, T7.X, literal.y, 6214; EG-NEXT: AND_INT * T7.X, T7.X, literal.x, 6215; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6216; EG-NEXT: MOV T7.Y, 0.0, 6217; EG-NEXT: MOV T8.W, 0.0, 6218; EG-NEXT: MOV * T9.W, 0.0, 6219; EG-NEXT: MOV T10.W, 0.0, 6220; EG-NEXT: MOV * T7.W, 0.0, 6221; EG-NEXT: LSHR T11.X, KC0[2].Y, literal.x, 6222; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6223; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6224; EG-NEXT: LSHR T12.X, PV.W, literal.x, 6225; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6226; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6227; EG-NEXT: LSHR T13.X, PV.W, literal.x, 6228; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6229; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6230; EG-NEXT: LSHR * T14.X, PV.W, literal.x, 6231; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6232; 6233; CM-LABEL: global_zextload_v8i16_to_v8i64: 6234; CM: ; %bb.0: 6235; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6236; CM-NEXT: TEX 0 @8 6237; CM-NEXT: ALU 32, @11, KC0[CB0:0-32], KC1[] 6238; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T8, T14.X 6239; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T9, T13.X 6240; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T10, T12.X 6241; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T11.X 6242; CM-NEXT: CF_END 6243; CM-NEXT: Fetch clause starting at 8: 6244; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 6245; CM-NEXT: ALU clause starting at 10: 6246; CM-NEXT: MOV * T7.X, KC0[2].Z, 6247; CM-NEXT: ALU clause starting at 11: 6248; CM-NEXT: LSHR * T8.Z, T7.X, literal.x, 6249; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6250; CM-NEXT: AND_INT T8.X, T7.X, literal.x, 6251; CM-NEXT: MOV T8.Y, 0.0, 6252; CM-NEXT: LSHR * T9.Z, T7.Y, literal.y, 6253; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6254; CM-NEXT: AND_INT T9.X, T7.Y, literal.x, 6255; CM-NEXT: MOV T9.Y, 0.0, 6256; CM-NEXT: LSHR * T10.Z, T7.Z, literal.y, 6257; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6258; CM-NEXT: AND_INT T10.X, T7.Z, literal.x, 6259; CM-NEXT: MOV T10.Y, 0.0, 6260; CM-NEXT: LSHR * T7.Z, T7.W, literal.y, 6261; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6262; CM-NEXT: AND_INT T7.X, T7.W, literal.x, 6263; CM-NEXT: MOV T7.Y, 0.0, 6264; CM-NEXT: MOV * T8.W, 0.0, 6265; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 6266; CM-NEXT: MOV * T9.W, 0.0, 6267; CM-NEXT: MOV * T10.W, 0.0, 6268; CM-NEXT: MOV * T7.W, 0.0, 6269; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 6270; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00) 6271; CM-NEXT: LSHR T11.X, PV.W, literal.x, 6272; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6273; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6274; CM-NEXT: LSHR T12.X, PV.W, literal.x, 6275; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6276; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6277; CM-NEXT: LSHR * T13.X, PV.W, literal.x, 6278; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6279; CM-NEXT: LSHR * T14.X, KC0[2].Y, literal.x, 6280; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6281 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 6282 %ext = zext <8 x i16> %load to <8 x i64> 6283 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 6284 ret void 6285} 6286 6287define amdgpu_kernel void @global_sextload_v8i16_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 { 6288; GCN-NOHSA-SI-LABEL: global_sextload_v8i16_to_v8i64: 6289; GCN-NOHSA-SI: ; %bb.0: 6290; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 6291; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6292; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6293; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 6294; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 6295; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6296; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 6297; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 6298; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6299; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 6300; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 6301; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 6302; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v3 6303; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v2 6304; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v0 6305; GCN-NOHSA-SI-NEXT: v_bfe_i32 v4, v0, 0, 16 6306; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[10:11], v[0:1], 48 6307; GCN-NOHSA-SI-NEXT: v_bfe_i32 v12, v5, 0, 16 6308; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[14:15], v[2:3], 48 6309; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v1, 0, 16 6310; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v2, 0, 16 6311; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 6312; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 6313; GCN-NOHSA-SI-NEXT: v_bfe_i32 v6, v6, 0, 16 6314; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v7, 0, 16 6315; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6316; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 6317; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 6318; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 6319; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48 6320; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 6321; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6322; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 6323; GCN-NOHSA-SI-NEXT: s_endpgm 6324; 6325; GCN-HSA-LABEL: global_sextload_v8i16_to_v8i64: 6326; GCN-HSA: ; %bb.0: 6327; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 6328; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6329; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6330; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6331; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 6332; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6333; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6334; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 6335; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 6336; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 6337; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6338; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1 6339; GCN-HSA-NEXT: v_mov_b32_e32 v9, s3 6340; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0 6341; GCN-HSA-NEXT: v_mov_b32_e32 v8, s2 6342; GCN-HSA-NEXT: s_add_u32 s0, s0, 32 6343; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 6344; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1 6345; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0 6346; GCN-HSA-NEXT: s_waitcnt vmcnt(0) 6347; GCN-HSA-NEXT: v_ashr_i64 v[6:7], v[0:1], 48 6348; GCN-HSA-NEXT: v_bfe_i32 v4, v1, 0, 16 6349; GCN-HSA-NEXT: v_mov_b32_e32 v11, v3 6350; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4 6351; GCN-HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v2 6352; GCN-HSA-NEXT: flat_store_dwordx4 v[8:9], v[4:7] 6353; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v0 6354; GCN-HSA-NEXT: v_bfe_i32 v4, v0, 0, 16 6355; GCN-HSA-NEXT: v_bfe_i32 v8, v2, 0, 16 6356; GCN-HSA-NEXT: v_ashr_i64 v[2:3], v[2:3], 48 6357; GCN-HSA-NEXT: v_bfe_i32 v0, v11, 0, 16 6358; GCN-HSA-NEXT: v_bfe_i32 v6, v1, 0, 16 6359; GCN-HSA-NEXT: v_bfe_i32 v10, v10, 0, 16 6360; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 6361; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4 6362; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6363; GCN-HSA-NEXT: v_ashrrev_i32_e32 v7, 31, v6 6364; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10 6365; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[0:3] 6366; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 6367; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[4:7] 6368; GCN-HSA-NEXT: s_endpgm 6369; 6370; GCN-NOHSA-VI-LABEL: global_sextload_v8i16_to_v8i64: 6371; GCN-NOHSA-VI: ; %bb.0: 6372; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 6373; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 6374; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 6375; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 6376; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 6377; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6378; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 6379; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 6380; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6381; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 6382; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 6383; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 6384; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, v3 6385; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 6386; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v0 6387; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v1 6388; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v10, 16, v2 6389; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v11, 0, 16 6390; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v3, 0, 16 6391; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v0, 0, 16 6392; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v1, 0, 16 6393; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v5, 0, 16 6394; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v2, 0, 16 6395; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v7, 0, 16 6396; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v10, 0, 16 6397; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 6398; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14 6399; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 6400; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 6401; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6402; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 6403; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 6404; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 31, v10 6405; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:48 6406; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:32 6407; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 6408; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 6409; GCN-NOHSA-VI-NEXT: s_endpgm 6410; 6411; EG-LABEL: global_sextload_v8i16_to_v8i64: 6412; EG: ; %bb.0: 6413; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6414; EG-NEXT: TEX 0 @8 6415; EG-NEXT: ALU 33, @11, KC0[CB0:0-32], KC1[] 6416; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T7.X, 0 6417; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T11.X, 0 6418; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T9.X, 0 6419; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XYZW, T8.X, 1 6420; EG-NEXT: CF_END 6421; EG-NEXT: Fetch clause starting at 8: 6422; EG-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 6423; EG-NEXT: ALU clause starting at 10: 6424; EG-NEXT: MOV * T7.X, KC0[2].Z, 6425; EG-NEXT: ALU clause starting at 11: 6426; EG-NEXT: LSHR T8.X, KC0[2].Y, literal.x, 6427; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6428; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6429; EG-NEXT: LSHR T9.X, PV.W, literal.x, 6430; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 6431; EG-NEXT: ASHR * T10.W, T7.X, literal.z, 6432; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6433; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6434; EG-NEXT: LSHR T11.X, PV.W, literal.x, 6435; EG-NEXT: ASHR T10.Z, T7.X, literal.y, 6436; EG-NEXT: ASHR * T12.W, T7.Y, literal.z, 6437; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6438; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6439; EG-NEXT: BFE_INT T10.X, T7.X, 0.0, literal.x, 6440; EG-NEXT: ASHR T12.Z, T7.Y, literal.x, 6441; EG-NEXT: ASHR * T13.W, T7.Z, literal.y, 6442; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6443; EG-NEXT: BFE_INT T12.X, T7.Y, 0.0, literal.x, 6444; EG-NEXT: ASHR T10.Y, PV.X, literal.y, 6445; EG-NEXT: ASHR T13.Z, T7.Z, literal.x, 6446; EG-NEXT: ASHR * T14.W, T7.W, literal.y, 6447; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6448; EG-NEXT: BFE_INT T13.X, T7.Z, 0.0, literal.x, 6449; EG-NEXT: ASHR T12.Y, PV.X, literal.y, 6450; EG-NEXT: ASHR * T14.Z, T7.W, literal.x, 6451; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6452; EG-NEXT: BFE_INT T14.X, T7.W, 0.0, literal.x, 6453; EG-NEXT: ASHR T13.Y, PV.X, literal.y, 6454; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 6455; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6456; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 6457; EG-NEXT: LSHR T7.X, PV.W, literal.x, 6458; EG-NEXT: ASHR * T14.Y, PV.X, literal.y, 6459; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6460; 6461; CM-LABEL: global_sextload_v8i16_to_v8i64: 6462; CM: ; %bb.0: 6463; CM-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 6464; CM-NEXT: TEX 0 @8 6465; CM-NEXT: ALU 33, @11, KC0[CB0:0-32], KC1[] 6466; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T7, T14.X 6467; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T13, T11.X 6468; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T9.X 6469; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T10, T8.X 6470; CM-NEXT: CF_END 6471; CM-NEXT: Fetch clause starting at 8: 6472; CM-NEXT: VTX_READ_128 T7.XYZW, T7.X, 0, #1 6473; CM-NEXT: ALU clause starting at 10: 6474; CM-NEXT: MOV * T7.X, KC0[2].Z, 6475; CM-NEXT: ALU clause starting at 11: 6476; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 6477; CM-NEXT: 48(6.726233e-44), 0(0.000000e+00) 6478; CM-NEXT: LSHR T8.X, PV.W, literal.x, 6479; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6480; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6481; CM-NEXT: LSHR T9.X, PV.W, literal.x, 6482; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y, 6483; CM-NEXT: ASHR * T10.W, T7.W, literal.z, 6484; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6485; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6486; CM-NEXT: LSHR T11.X, PV.Z, literal.x, 6487; CM-NEXT: ASHR T10.Z, T7.W, literal.y, 6488; CM-NEXT: ASHR * T12.W, T7.Z, literal.z, 6489; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6490; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 6491; CM-NEXT: BFE_INT T10.X, T7.W, 0.0, literal.x, 6492; CM-NEXT: ASHR T12.Z, T7.Z, literal.x, 6493; CM-NEXT: ASHR * T13.W, T7.Y, literal.y, 6494; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6495; CM-NEXT: BFE_INT T12.X, T7.Z, 0.0, literal.x, 6496; CM-NEXT: ASHR T10.Y, PV.X, literal.y, 6497; CM-NEXT: ASHR T13.Z, T7.Y, literal.x, 6498; CM-NEXT: ASHR * T7.W, T7.X, literal.y, 6499; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6500; CM-NEXT: BFE_INT T13.X, T7.Y, 0.0, literal.x, 6501; CM-NEXT: ASHR T12.Y, PV.X, literal.y, 6502; CM-NEXT: ASHR * T7.Z, T7.X, literal.x, 6503; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6504; CM-NEXT: BFE_INT T7.X, T7.X, 0.0, literal.x, 6505; CM-NEXT: ASHR * T13.Y, PV.X, literal.y, 6506; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 6507; CM-NEXT: LSHR T14.X, KC0[2].Y, literal.x, 6508; CM-NEXT: ASHR * T7.Y, PV.X, literal.y, 6509; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 6510 %load = load <8 x i16>, <8 x i16> addrspace(1)* %in 6511 %ext = sext <8 x i16> %load to <8 x i64> 6512 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 6513 ret void 6514} 6515 6516define amdgpu_kernel void @global_zextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { 6517; GCN-NOHSA-SI-LABEL: global_zextload_v16i16_to_v16i64: 6518; GCN-NOHSA-SI: ; %bb.0: 6519; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 6520; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6521; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6522; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 6523; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 6524; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6525; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 6526; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 6527; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6528; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 6529; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(1) 6530; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v10, 16, v1 6531; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v14, 16, v2 6532; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v18, 16, v0 6533; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v16, 0xffff, v0 6534; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v12, 0xffff, v2 6535; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v1 6536; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v3 6537; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v3 6538; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v20, 0 6539; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 6540; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v5 6541; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v25, 16, v6 6542; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v23, 0xffff, v6 6543; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v6, 16, v4 6544; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v4 6545; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v28, 16, v7 6546; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v26, 0xffff, v7 6547; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v19, 0xffff, v5 6548; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v22, v20 6549; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, v20 6550; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v29, v20 6551; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, v20 6552; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v20 6553; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, v20 6554; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, v20 6555; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v20 6556; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v24, v20 6557; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v20 6558; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v17, v20 6559; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 6560; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 6561; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[19:22], off, s[0:3], 0 offset:80 6562; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[26:29], off, s[0:3], 0 offset:112 6563; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(1) 6564; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, 0 6565; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v15, 0 6566; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6567; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v26, 0 6568; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, 0 6569; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 6570; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 6571; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:64 6572; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[23:26], off, s[0:3], 0 offset:96 6573; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32 6574; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 6575; GCN-NOHSA-SI-NEXT: s_endpgm 6576; 6577; GCN-HSA-LABEL: global_zextload_v16i16_to_v16i64: 6578; GCN-HSA: ; %bb.0: 6579; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 6580; GCN-HSA-NEXT: v_mov_b32_e32 v8, 0 6581; GCN-HSA-NEXT: v_mov_b32_e32 v10, v8 6582; GCN-HSA-NEXT: v_mov_b32_e32 v12, v8 6583; GCN-HSA-NEXT: v_mov_b32_e32 v15, 0 6584; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6585; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6586; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6587; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 6588; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 6589; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 6590; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6591; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6592; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 6593; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6594; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6595; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 6596; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6597; GCN-HSA-NEXT: v_mov_b32_e32 v14, s5 6598; GCN-HSA-NEXT: v_mov_b32_e32 v13, s4 6599; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x50 6600; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6601; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 6602; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v1 6603; GCN-HSA-NEXT: v_and_b32_e32 v9, 0xffff, v1 6604; GCN-HSA-NEXT: flat_store_dwordx4 v[13:14], v[9:12] 6605; GCN-HSA-NEXT: v_mov_b32_e32 v14, s5 6606; GCN-HSA-NEXT: v_mov_b32_e32 v13, s4 6607; GCN-HSA-NEXT: s_add_u32 s4, s0, 0x70 6608; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 6609; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v5 6610; GCN-HSA-NEXT: v_and_b32_e32 v9, 0xffff, v5 6611; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6612; GCN-HSA-NEXT: flat_store_dwordx4 v[13:14], v[9:12] 6613; GCN-HSA-NEXT: v_mov_b32_e32 v14, s5 6614; GCN-HSA-NEXT: v_mov_b32_e32 v13, s4 6615; GCN-HSA-NEXT: s_add_u32 s4, s0, 32 6616; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v7 6617; GCN-HSA-NEXT: v_and_b32_e32 v9, 0xffff, v7 6618; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 6619; GCN-HSA-NEXT: flat_store_dwordx4 v[13:14], v[9:12] 6620; GCN-HSA-NEXT: v_mov_b32_e32 v14, s5 6621; GCN-HSA-NEXT: v_lshrrev_b32_e32 v11, 16, v2 6622; GCN-HSA-NEXT: v_and_b32_e32 v9, 0xffff, v2 6623; GCN-HSA-NEXT: v_mov_b32_e32 v1, s2 6624; GCN-HSA-NEXT: v_mov_b32_e32 v12, 0 6625; GCN-HSA-NEXT: v_mov_b32_e32 v13, s4 6626; GCN-HSA-NEXT: v_mov_b32_e32 v2, s3 6627; GCN-HSA-NEXT: s_add_u32 s2, s0, 64 6628; GCN-HSA-NEXT: flat_store_dwordx4 v[13:14], v[9:12] 6629; GCN-HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v4 6630; GCN-HSA-NEXT: v_and_b32_e32 v12, 0xffff, v4 6631; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 6632; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6633; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 6634; GCN-HSA-NEXT: s_add_u32 s0, s0, 0x60 6635; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v3 6636; GCN-HSA-NEXT: v_and_b32_e32 v7, 0xffff, v3 6637; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 6638; GCN-HSA-NEXT: flat_store_dwordx4 v[1:2], v[7:10] 6639; GCN-HSA-NEXT: v_mov_b32_e32 v13, v8 6640; GCN-HSA-NEXT: v_mov_b32_e32 v9, v8 6641; GCN-HSA-NEXT: v_mov_b32_e32 v1, v8 6642; GCN-HSA-NEXT: v_mov_b32_e32 v3, 0 6643; GCN-HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v6 6644; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v6 6645; GCN-HSA-NEXT: v_mov_b32_e32 v7, s3 6646; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v0 6647; GCN-HSA-NEXT: v_and_b32_e32 v0, 0xffff, v0 6648; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1 6649; GCN-HSA-NEXT: v_mov_b32_e32 v11, 0 6650; GCN-HSA-NEXT: v_mov_b32_e32 v6, s2 6651; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0 6652; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 6653; GCN-HSA-NEXT: flat_store_dwordx4 v[6:7], v[12:15] 6654; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 6655; GCN-HSA-NEXT: s_endpgm 6656; 6657; GCN-NOHSA-VI-LABEL: global_zextload_v16i16_to_v16i64: 6658; GCN-NOHSA-VI: ; %bb.0: 6659; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 6660; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 6661; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 6662; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 6663; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 6664; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 6665; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 6666; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 6667; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6668; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 6669; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 6670; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 6671; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v30, 0 6672; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, 0 6673; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v15, 0 6674; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v19, 0 6675; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v23, 0 6676; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 6677; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, 0xffff, v3 6678; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 6679; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v27, 0xffff, v4 6680; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v29, 16, v4 6681; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v4, 0 6682; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v28, v4 6683; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v18, 16, v3 6684; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, 0xffff, v6 6685; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v22, 16, v6 6686; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v24, 0xffff, v7 6687; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v26, 16, v7 6688; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v3, 0xffff, v5 6689; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v5 6690; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v6, 0 6691; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v25, v4 6692; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:64 6693; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v0 6694; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v27, 0 6695; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v10, 16, v0 6696; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, 0xffff, v1 6697; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v14, 16, v1 6698; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v2 6699; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 6700; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v21, v4 6701; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v17, v4 6702; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, v4 6703; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v4 6704; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, v4 6705; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[3:6], off, s[0:3], 0 offset:80 6706; GCN-NOHSA-VI-NEXT: s_nop 0 6707; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, 0 6708; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:112 6709; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:96 6710; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 6711; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6712; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 6713; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 6714; GCN-NOHSA-VI-NEXT: s_endpgm 6715; 6716; EG-LABEL: global_zextload_v16i16_to_v16i64: 6717; EG: ; %bb.0: 6718; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 6719; EG-NEXT: TEX 1 @12 6720; EG-NEXT: ALU 62, @17, KC0[CB0:0-32], KC1[] 6721; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T13.XYZW, T26.X, 0 6722; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T14.XYZW, T25.X, 0 6723; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T15.XYZW, T24.X, 0 6724; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T12.XYZW, T23.X, 0 6725; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T16.XYZW, T22.X, 0 6726; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T17.XYZW, T21.X, 0 6727; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T18.XYZW, T20.X, 0 6728; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T19.X, 1 6729; EG-NEXT: CF_END 6730; EG-NEXT: Fetch clause starting at 12: 6731; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 6732; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 6733; EG-NEXT: ALU clause starting at 16: 6734; EG-NEXT: MOV * T11.X, KC0[2].Z, 6735; EG-NEXT: ALU clause starting at 17: 6736; EG-NEXT: LSHR * T13.Z, T12.W, literal.x, 6737; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6738; EG-NEXT: AND_INT T13.X, T12.W, literal.x, 6739; EG-NEXT: MOV T13.Y, 0.0, 6740; EG-NEXT: LSHR T14.Z, T12.Z, literal.y, 6741; EG-NEXT: AND_INT * T14.X, T12.Z, literal.x, 6742; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6743; EG-NEXT: MOV T14.Y, 0.0, 6744; EG-NEXT: LSHR * T15.Z, T12.Y, literal.x, 6745; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6746; EG-NEXT: AND_INT T15.X, T12.Y, literal.x, 6747; EG-NEXT: MOV T15.Y, 0.0, 6748; EG-NEXT: LSHR T12.Z, T12.X, literal.y, 6749; EG-NEXT: AND_INT * T12.X, T12.X, literal.x, 6750; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6751; EG-NEXT: MOV T12.Y, 0.0, 6752; EG-NEXT: LSHR * T16.Z, T11.W, literal.x, 6753; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6754; EG-NEXT: AND_INT T16.X, T11.W, literal.x, 6755; EG-NEXT: MOV T16.Y, 0.0, 6756; EG-NEXT: LSHR T17.Z, T11.Z, literal.y, 6757; EG-NEXT: AND_INT * T17.X, T11.Z, literal.x, 6758; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6759; EG-NEXT: MOV T17.Y, 0.0, 6760; EG-NEXT: LSHR * T18.Z, T11.Y, literal.x, 6761; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6762; EG-NEXT: AND_INT T18.X, T11.Y, literal.x, 6763; EG-NEXT: MOV T18.Y, 0.0, 6764; EG-NEXT: LSHR T11.Z, T11.X, literal.y, 6765; EG-NEXT: AND_INT * T11.X, T11.X, literal.x, 6766; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6767; EG-NEXT: MOV T11.Y, 0.0, 6768; EG-NEXT: MOV T13.W, 0.0, 6769; EG-NEXT: MOV * T14.W, 0.0, 6770; EG-NEXT: MOV T15.W, 0.0, 6771; EG-NEXT: MOV * T12.W, 0.0, 6772; EG-NEXT: MOV T16.W, 0.0, 6773; EG-NEXT: MOV * T17.W, 0.0, 6774; EG-NEXT: MOV T18.W, 0.0, 6775; EG-NEXT: MOV * T11.W, 0.0, 6776; EG-NEXT: LSHR T19.X, KC0[2].Y, literal.x, 6777; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6778; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6779; EG-NEXT: LSHR T20.X, PV.W, literal.x, 6780; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6781; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6782; EG-NEXT: LSHR T21.X, PV.W, literal.x, 6783; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6784; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6785; EG-NEXT: LSHR T22.X, PV.W, literal.x, 6786; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6787; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 6788; EG-NEXT: LSHR T23.X, PV.W, literal.x, 6789; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6790; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 6791; EG-NEXT: LSHR T24.X, PV.W, literal.x, 6792; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6793; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 6794; EG-NEXT: LSHR T25.X, PV.W, literal.x, 6795; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6796; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 6797; EG-NEXT: LSHR * T26.X, PV.W, literal.x, 6798; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6799; 6800; CM-LABEL: global_zextload_v16i16_to_v16i64: 6801; CM: ; %bb.0: 6802; CM-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 6803; CM-NEXT: TEX 1 @12 6804; CM-NEXT: ALU 64, @17, KC0[CB0:0-32], KC1[] 6805; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T13, T26.X 6806; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T14, T25.X 6807; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T15, T24.X 6808; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T23.X 6809; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T16, T22.X 6810; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T17, T21.X 6811; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T18, T20.X 6812; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T19.X 6813; CM-NEXT: CF_END 6814; CM-NEXT: Fetch clause starting at 12: 6815; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1 6816; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1 6817; CM-NEXT: ALU clause starting at 16: 6818; CM-NEXT: MOV * T11.X, KC0[2].Z, 6819; CM-NEXT: ALU clause starting at 17: 6820; CM-NEXT: LSHR * T13.Z, T12.X, literal.x, 6821; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 6822; CM-NEXT: AND_INT T13.X, T12.X, literal.x, 6823; CM-NEXT: MOV T13.Y, 0.0, 6824; CM-NEXT: LSHR * T14.Z, T12.Y, literal.y, 6825; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6826; CM-NEXT: AND_INT T14.X, T12.Y, literal.x, 6827; CM-NEXT: MOV T14.Y, 0.0, 6828; CM-NEXT: LSHR * T15.Z, T12.Z, literal.y, 6829; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6830; CM-NEXT: AND_INT T15.X, T12.Z, literal.x, 6831; CM-NEXT: MOV T15.Y, 0.0, 6832; CM-NEXT: LSHR * T12.Z, T12.W, literal.y, 6833; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6834; CM-NEXT: AND_INT T12.X, T12.W, literal.x, 6835; CM-NEXT: MOV T12.Y, 0.0, 6836; CM-NEXT: LSHR * T16.Z, T11.X, literal.y, 6837; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6838; CM-NEXT: AND_INT T16.X, T11.X, literal.x, 6839; CM-NEXT: MOV T16.Y, 0.0, 6840; CM-NEXT: LSHR * T17.Z, T11.Y, literal.y, 6841; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6842; CM-NEXT: AND_INT T17.X, T11.Y, literal.x, 6843; CM-NEXT: MOV T17.Y, 0.0, 6844; CM-NEXT: LSHR * T18.Z, T11.Z, literal.y, 6845; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6846; CM-NEXT: AND_INT T18.X, T11.Z, literal.x, 6847; CM-NEXT: MOV T18.Y, 0.0, 6848; CM-NEXT: LSHR * T11.Z, T11.W, literal.y, 6849; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 6850; CM-NEXT: AND_INT T11.X, T11.W, literal.x, 6851; CM-NEXT: MOV T11.Y, 0.0, 6852; CM-NEXT: MOV * T13.W, 0.0, 6853; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 6854; CM-NEXT: MOV * T14.W, 0.0, 6855; CM-NEXT: MOV * T15.W, 0.0, 6856; CM-NEXT: MOV * T12.W, 0.0, 6857; CM-NEXT: MOV * T16.W, 0.0, 6858; CM-NEXT: MOV * T17.W, 0.0, 6859; CM-NEXT: MOV * T18.W, 0.0, 6860; CM-NEXT: MOV * T11.W, 0.0, 6861; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 6862; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00) 6863; CM-NEXT: LSHR T19.X, PV.W, literal.x, 6864; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6865; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43) 6866; CM-NEXT: LSHR T20.X, PV.W, literal.x, 6867; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6868; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43) 6869; CM-NEXT: LSHR T21.X, PV.W, literal.x, 6870; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6871; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44) 6872; CM-NEXT: LSHR T22.X, PV.W, literal.x, 6873; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6874; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44) 6875; CM-NEXT: LSHR T23.X, PV.W, literal.x, 6876; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6877; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 6878; CM-NEXT: LSHR T24.X, PV.W, literal.x, 6879; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 6880; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 6881; CM-NEXT: LSHR * T25.X, PV.W, literal.x, 6882; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6883; CM-NEXT: LSHR * T26.X, KC0[2].Y, literal.x, 6884; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 6885 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 6886 %ext = zext <16 x i16> %load to <16 x i64> 6887 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 6888 ret void 6889} 6890 6891define amdgpu_kernel void @global_sextload_v16i16_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i16> addrspace(1)* %in) #0 { 6892; GCN-NOHSA-SI-LABEL: global_sextload_v16i16_to_v16i64: 6893; GCN-NOHSA-SI: ; %bb.0: 6894; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 6895; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 6896; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 6897; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 6898; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 6899; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 6900; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 6901; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 6902; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 6903; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 6904; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 6905; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 6906; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 6907; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v8, v7 6908; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v12, v3 6909; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v13, 16, v4 6910; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v14, 16, v2 6911; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v15, 16, v0 6912; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v8, 0, 16 6913; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[10:11], v[6:7], 48 6914; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6915; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:112 6916; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6917; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[9:10], v[4:5], 48 6918; GCN-NOHSA-SI-NEXT: v_bfe_i32 v7, v5, 0, 16 6919; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v8, 31, v7 6920; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 offset:80 6921; GCN-NOHSA-SI-NEXT: v_bfe_i32 v5, v0, 0, 16 6922; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6923; GCN-NOHSA-SI-NEXT: v_bfe_i32 v7, v15, 0, 16 6924; GCN-NOHSA-SI-NEXT: v_bfe_i32 v8, v12, 0, 16 6925; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[10:11], v[2:3], 48 6926; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6927; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:48 6928; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 6929; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[11:12], v[0:1], 48 6930; GCN-NOHSA-SI-NEXT: v_bfe_i32 v9, v1, 0, 16 6931; GCN-NOHSA-SI-NEXT: v_bfe_i32 v0, v2, 0, 16 6932; GCN-NOHSA-SI-NEXT: v_bfe_i32 v2, v14, 0, 16 6933; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v13, 0, 16 6934; GCN-NOHSA-SI-NEXT: v_bfe_i32 v13, v4, 0, 16 6935; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v6 6936; GCN-NOHSA-SI-NEXT: v_bfe_i32 v17, v6, 0, 16 6937; GCN-NOHSA-SI-NEXT: v_bfe_i32 v19, v1, 0, 16 6938; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5 6939; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 6940; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 6941; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v18, 31, v17 6942; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 6943; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v8, 31, v7 6944; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 6945; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 6946; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v20, 31, v19 6947; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:16 6948; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:96 6949; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:64 6950; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 6951; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[5:8], off, s[0:3], 0 6952; GCN-NOHSA-SI-NEXT: s_endpgm 6953; 6954; GCN-HSA-LABEL: global_sextload_v16i16_to_v16i64: 6955; GCN-HSA: ; %bb.0: 6956; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 6957; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 6958; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 6959; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 6960; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 6961; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 6962; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 6963; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 6964; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 6965; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 6966; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 6967; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6968; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 6969; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 6970; GCN-HSA-NEXT: s_add_u32 s2, s0, 16 6971; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6972; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 6973; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 6974; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x70 6975; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6976; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 6977; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 6978; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 6979; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6980; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1 6981; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0 6982; GCN-HSA-NEXT: s_waitcnt vmcnt(1) 6983; GCN-HSA-NEXT: v_ashr_i64 v[10:11], v[0:1], 48 6984; GCN-HSA-NEXT: v_bfe_i32 v8, v1, 0, 16 6985; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6986; GCN-HSA-NEXT: v_mov_b32_e32 v1, v3 6987; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 6988; GCN-HSA-NEXT: v_mov_b32_e32 v17, s3 6989; GCN-HSA-NEXT: v_bfe_i32 v8, v1, 0, 16 6990; GCN-HSA-NEXT: v_ashr_i64 v[10:11], v[2:3], 48 6991; GCN-HSA-NEXT: v_mov_b32_e32 v16, s2 6992; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 6993; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 6994; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 6995; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] 6996; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v2 6997; GCN-HSA-NEXT: v_bfe_i32 v8, v2, 0, 16 6998; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v0 6999; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 7000; GCN-HSA-NEXT: v_bfe_i32 v0, v0, 0, 16 7001; GCN-HSA-NEXT: v_bfe_i32 v2, v2, 0, 16 7002; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 7003; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 7004; GCN-HSA-NEXT: v_bfe_i32 v10, v1, 0, 16 7005; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 7006; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 7007; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 7008; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 7009; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10 7010; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[0:3] 7011; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] 7012; GCN-HSA-NEXT: s_waitcnt vmcnt(4) 7013; GCN-HSA-NEXT: v_ashr_i64 v[2:3], v[4:5], 48 7014; GCN-HSA-NEXT: v_bfe_i32 v0, v5, 0, 16 7015; GCN-HSA-NEXT: s_add_u32 s0, s0, 64 7016; GCN-HSA-NEXT: v_mov_b32_e32 v11, v7 7017; GCN-HSA-NEXT: v_lshrrev_b32_e32 v8, 16, v6 7018; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 7019; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 7020; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[0:3] 7021; GCN-HSA-NEXT: v_bfe_i32 v10, v8, 0, 16 7022; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v4 7023; GCN-HSA-NEXT: v_bfe_i32 v0, v4, 0, 16 7024; GCN-HSA-NEXT: v_bfe_i32 v8, v6, 0, 16 7025; GCN-HSA-NEXT: v_ashr_i64 v[6:7], v[6:7], 48 7026; GCN-HSA-NEXT: v_bfe_i32 v4, v11, 0, 16 7027; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 7028; GCN-HSA-NEXT: v_mov_b32_e32 v21, s1 7029; GCN-HSA-NEXT: v_bfe_i32 v2, v1, 0, 16 7030; GCN-HSA-NEXT: v_ashrrev_i32_e32 v5, 31, v4 7031; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 7032; GCN-HSA-NEXT: v_mov_b32_e32 v20, s0 7033; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 7034; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 7035; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 7036; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10 7037; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[4:7] 7038; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[8:11] 7039; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[0:3] 7040; GCN-HSA-NEXT: s_endpgm 7041; 7042; GCN-NOHSA-VI-LABEL: global_sextload_v16i16_to_v16i64: 7043; GCN-NOHSA-VI: ; %bb.0: 7044; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 7045; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 7046; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 7047; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 7048; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 7049; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 7050; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 7051; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 7052; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 7053; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 7054; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 7055; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 7056; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 7057; GCN-NOHSA-VI-NEXT: v_bfe_i32 v8, v0, 0, 16 7058; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 7059; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v5 7060; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v9, 0, 16 7061; GCN-NOHSA-VI-NEXT: v_bfe_i32 v9, v5, 0, 16 7062; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 7063; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 7064; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v5, 16, v4 7065; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:80 7066; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v7 7067; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v5, 0, 16 7068; GCN-NOHSA-VI-NEXT: v_bfe_i32 v9, v4, 0, 16 7069; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 7070; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 7071; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v4, 16, v7 7072; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:64 7073; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 7074; GCN-NOHSA-VI-NEXT: v_bfe_i32 v9, v13, 0, 16 7075; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v4, 0, 16 7076; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 7077; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 7078; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:112 7079; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2 7080; GCN-NOHSA-VI-NEXT: v_bfe_i32 v12, v1, 0, 16 7081; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7082; GCN-NOHSA-VI-NEXT: v_bfe_i32 v14, v1, 0, 16 7083; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, v3 7084; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v1, 0, 16 7085; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v6 7086; GCN-NOHSA-VI-NEXT: v_bfe_i32 v10, v0, 0, 16 7087; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v3 7088; GCN-NOHSA-VI-NEXT: v_bfe_i32 v0, v2, 0, 16 7089; GCN-NOHSA-VI-NEXT: v_bfe_i32 v2, v4, 0, 16 7090; GCN-NOHSA-VI-NEXT: v_bfe_i32 v4, v6, 0, 16 7091; GCN-NOHSA-VI-NEXT: v_bfe_i32 v6, v1, 0, 16 7092; GCN-NOHSA-VI-NEXT: v_bfe_i32 v18, v3, 0, 16 7093; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v5, 31, v4 7094; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v7, 31, v6 7095; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v9, 31, v8 7096; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v13, 31, v12 7097; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0 7098; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v11, 31, v10 7099; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v15, 31, v14 7100; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v3, 31, v2 7101; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 7102; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v19, 31, v18 7103; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:96 7104; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:48 7105; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:32 7106; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:16 7107; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 7108; GCN-NOHSA-VI-NEXT: s_endpgm 7109; 7110; EG-LABEL: global_sextload_v16i16_to_v16i64: 7111; EG: ; %bb.0: 7112; EG-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 7113; EG-NEXT: TEX 1 @12 7114; EG-NEXT: ALU 65, @17, KC0[CB0:0-32], KC1[] 7115; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T12.X, 0 7116; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T20.X, 0 7117; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T11.XYZW, T18.X, 0 7118; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T17.X, 0 7119; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T16.X, 0 7120; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T15.X, 0 7121; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T14.X, 0 7122; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T13.X, 1 7123; EG-NEXT: CF_END 7124; EG-NEXT: Fetch clause starting at 12: 7125; EG-NEXT: VTX_READ_128 T12.XYZW, T11.X, 16, #1 7126; EG-NEXT: VTX_READ_128 T11.XYZW, T11.X, 0, #1 7127; EG-NEXT: ALU clause starting at 16: 7128; EG-NEXT: MOV * T11.X, KC0[2].Z, 7129; EG-NEXT: ALU clause starting at 17: 7130; EG-NEXT: LSHR T13.X, KC0[2].Y, literal.x, 7131; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7132; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7133; EG-NEXT: LSHR T14.X, PV.W, literal.x, 7134; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7135; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7136; EG-NEXT: LSHR T15.X, PV.W, literal.x, 7137; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7138; EG-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7139; EG-NEXT: LSHR T16.X, PV.W, literal.x, 7140; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7141; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7142; EG-NEXT: LSHR T17.X, PV.W, literal.x, 7143; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7144; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7145; EG-NEXT: LSHR T18.X, PV.W, literal.x, 7146; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 7147; EG-NEXT: ASHR * T19.W, T11.X, literal.z, 7148; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7149; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7150; EG-NEXT: LSHR T20.X, PV.W, literal.x, 7151; EG-NEXT: ASHR T19.Z, T11.X, literal.y, 7152; EG-NEXT: ASHR * T21.W, T11.Y, literal.z, 7153; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7154; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7155; EG-NEXT: BFE_INT T19.X, T11.X, 0.0, literal.x, 7156; EG-NEXT: ASHR T21.Z, T11.Y, literal.x, 7157; EG-NEXT: ASHR * T22.W, T11.Z, literal.y, 7158; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7159; EG-NEXT: BFE_INT T21.X, T11.Y, 0.0, literal.x, 7160; EG-NEXT: ASHR T19.Y, PV.X, literal.y, 7161; EG-NEXT: ASHR T22.Z, T11.Z, literal.x, 7162; EG-NEXT: ASHR * T23.W, T11.W, literal.y, 7163; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7164; EG-NEXT: BFE_INT T22.X, T11.Z, 0.0, literal.x, 7165; EG-NEXT: ASHR T21.Y, PV.X, literal.y, 7166; EG-NEXT: ASHR T23.Z, T11.W, literal.x, 7167; EG-NEXT: ASHR * T24.W, T12.X, literal.y, 7168; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7169; EG-NEXT: BFE_INT T23.X, T11.W, 0.0, literal.x, 7170; EG-NEXT: ASHR T22.Y, PV.X, literal.y, 7171; EG-NEXT: ASHR T24.Z, T12.X, literal.x, 7172; EG-NEXT: ASHR * T11.W, T12.Y, literal.y, 7173; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7174; EG-NEXT: BFE_INT T24.X, T12.X, 0.0, literal.x, 7175; EG-NEXT: ASHR T23.Y, PV.X, literal.y, 7176; EG-NEXT: ASHR T11.Z, T12.Y, literal.x, 7177; EG-NEXT: ASHR * T25.W, T12.Z, literal.y, 7178; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7179; EG-NEXT: BFE_INT T11.X, T12.Y, 0.0, literal.x, 7180; EG-NEXT: ASHR T24.Y, PV.X, literal.y, 7181; EG-NEXT: ASHR T25.Z, T12.Z, literal.x, 7182; EG-NEXT: ASHR * T26.W, T12.W, literal.y, 7183; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7184; EG-NEXT: BFE_INT T25.X, T12.Z, 0.0, literal.x, 7185; EG-NEXT: ASHR T11.Y, PV.X, literal.y, 7186; EG-NEXT: ASHR * T26.Z, T12.W, literal.x, 7187; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7188; EG-NEXT: BFE_INT T26.X, T12.W, 0.0, literal.x, 7189; EG-NEXT: ASHR T25.Y, PV.X, literal.y, 7190; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 7191; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7192; EG-NEXT: 112(1.569454e-43), 0(0.000000e+00) 7193; EG-NEXT: LSHR T12.X, PV.W, literal.x, 7194; EG-NEXT: ASHR * T26.Y, PV.X, literal.y, 7195; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 7196; 7197; CM-LABEL: global_sextload_v16i16_to_v16i64: 7198; CM: ; %bb.0: 7199; CM-NEXT: ALU 0, @16, KC0[CB0:0-32], KC1[] 7200; CM-NEXT: TEX 1 @12 7201; CM-NEXT: ALU 65, @17, KC0[CB0:0-32], KC1[] 7202; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T12, T26.X 7203; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T25, T20.X 7204; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T24, T18.X 7205; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T23, T17.X 7206; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T11, T16.X 7207; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T22, T15.X 7208; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T21, T14.X 7209; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T13.X 7210; CM-NEXT: CF_END 7211; CM-NEXT: Fetch clause starting at 12: 7212; CM-NEXT: VTX_READ_128 T12.XYZW, T11.X, 0, #1 7213; CM-NEXT: VTX_READ_128 T11.XYZW, T11.X, 16, #1 7214; CM-NEXT: ALU clause starting at 16: 7215; CM-NEXT: MOV * T11.X, KC0[2].Z, 7216; CM-NEXT: ALU clause starting at 17: 7217; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 7218; CM-NEXT: 112(1.569454e-43), 0(0.000000e+00) 7219; CM-NEXT: LSHR T13.X, PV.W, literal.x, 7220; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7221; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7222; CM-NEXT: LSHR T14.X, PV.W, literal.x, 7223; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7224; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7225; CM-NEXT: LSHR T15.X, PV.W, literal.x, 7226; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7227; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7228; CM-NEXT: LSHR T16.X, PV.W, literal.x, 7229; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7230; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44) 7231; CM-NEXT: LSHR T17.X, PV.W, literal.x, 7232; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7233; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7234; CM-NEXT: LSHR T18.X, PV.W, literal.x, 7235; CM-NEXT: ADD_INT T0.Z, KC0[2].Y, literal.y, 7236; CM-NEXT: ASHR * T19.W, T11.W, literal.z, 7237; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7238; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7239; CM-NEXT: LSHR T20.X, PV.Z, literal.x, 7240; CM-NEXT: ASHR T19.Z, T11.W, literal.y, 7241; CM-NEXT: ASHR * T21.W, T11.Z, literal.z, 7242; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 7243; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 7244; CM-NEXT: BFE_INT T19.X, T11.W, 0.0, literal.x, 7245; CM-NEXT: ASHR T21.Z, T11.Z, literal.x, 7246; CM-NEXT: ASHR * T22.W, T11.Y, literal.y, 7247; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7248; CM-NEXT: BFE_INT T21.X, T11.Z, 0.0, literal.x, 7249; CM-NEXT: ASHR T19.Y, PV.X, literal.y, 7250; CM-NEXT: ASHR T22.Z, T11.Y, literal.x, 7251; CM-NEXT: ASHR * T11.W, T11.X, literal.y, 7252; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7253; CM-NEXT: BFE_INT T22.X, T11.Y, 0.0, literal.x, 7254; CM-NEXT: ASHR T21.Y, PV.X, literal.y, 7255; CM-NEXT: ASHR T11.Z, T11.X, literal.x, 7256; CM-NEXT: ASHR * T23.W, T12.W, literal.y, 7257; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7258; CM-NEXT: BFE_INT T11.X, T11.X, 0.0, literal.x, 7259; CM-NEXT: ASHR T22.Y, PV.X, literal.y, 7260; CM-NEXT: ASHR T23.Z, T12.W, literal.x, 7261; CM-NEXT: ASHR * T24.W, T12.Z, literal.y, 7262; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7263; CM-NEXT: BFE_INT T23.X, T12.W, 0.0, literal.x, 7264; CM-NEXT: ASHR T11.Y, PV.X, literal.y, 7265; CM-NEXT: ASHR T24.Z, T12.Z, literal.x, 7266; CM-NEXT: ASHR * T25.W, T12.Y, literal.y, 7267; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7268; CM-NEXT: BFE_INT T24.X, T12.Z, 0.0, literal.x, 7269; CM-NEXT: ASHR T23.Y, PV.X, literal.y, 7270; CM-NEXT: ASHR T25.Z, T12.Y, literal.x, 7271; CM-NEXT: ASHR * T12.W, T12.X, literal.y, 7272; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7273; CM-NEXT: BFE_INT T25.X, T12.Y, 0.0, literal.x, 7274; CM-NEXT: ASHR T24.Y, PV.X, literal.y, 7275; CM-NEXT: ASHR * T12.Z, T12.X, literal.x, 7276; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7277; CM-NEXT: BFE_INT T12.X, T12.X, 0.0, literal.x, 7278; CM-NEXT: ASHR * T25.Y, PV.X, literal.y, 7279; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 7280; CM-NEXT: LSHR T26.X, KC0[2].Y, literal.x, 7281; CM-NEXT: ASHR * T12.Y, PV.X, literal.y, 7282; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 7283 %load = load <16 x i16>, <16 x i16> addrspace(1)* %in 7284 %ext = sext <16 x i16> %load to <16 x i64> 7285 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 7286 ret void 7287} 7288 7289define amdgpu_kernel void @global_zextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { 7290; GCN-NOHSA-SI-LABEL: global_zextload_v32i16_to_v32i64: 7291; GCN-NOHSA-SI: ; %bb.0: 7292; GCN-NOHSA-SI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0 7293; GCN-NOHSA-SI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1 7294; GCN-NOHSA-SI-NEXT: s_mov_b32 s14, -1 7295; GCN-NOHSA-SI-NEXT: s_mov_b32 s15, 0xe8f000 7296; GCN-NOHSA-SI-NEXT: s_add_u32 s12, s12, s3 7297; GCN-NOHSA-SI-NEXT: s_addc_u32 s13, s13, 0 7298; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 7299; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 7300; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 7301; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, 0 7302; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 7303; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 7304; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 7305; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 7306; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 7307; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[2:5], off, s[8:11], 0 7308; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[6:9], off, s[8:11], 0 offset:16 7309; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[10:13], off, s[8:11], 0 offset:32 7310; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[14:17], off, s[8:11], 0 offset:48 7311; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3) 7312; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v23, 16, v3 7313; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v21, 16, v4 7314; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v20, 16, v2 7315; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, 0xffff, v2 7316; GCN-NOHSA-SI-NEXT: buffer_store_dword v18, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill 7317; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 7318; GCN-NOHSA-SI-NEXT: buffer_store_dword v19, off, s[12:15], 0 offset:8 ; 4-byte Folded Spill 7319; GCN-NOHSA-SI-NEXT: buffer_store_dword v20, off, s[12:15], 0 offset:12 ; 4-byte Folded Spill 7320; GCN-NOHSA-SI-NEXT: buffer_store_dword v21, off, s[12:15], 0 offset:16 ; 4-byte Folded Spill 7321; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(2) 7322; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v19, 0xffff, v4 7323; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7324; GCN-NOHSA-SI-NEXT: buffer_store_dword v19, off, s[12:15], 0 offset:20 ; 4-byte Folded Spill 7325; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 7326; GCN-NOHSA-SI-NEXT: buffer_store_dword v20, off, s[12:15], 0 offset:24 ; 4-byte Folded Spill 7327; GCN-NOHSA-SI-NEXT: buffer_store_dword v21, off, s[12:15], 0 offset:28 ; 4-byte Folded Spill 7328; GCN-NOHSA-SI-NEXT: buffer_store_dword v22, off, s[12:15], 0 offset:32 ; 4-byte Folded Spill 7329; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v4, 0xffff, v3 7330; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v32, 16, v5 7331; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v30, 0xffff, v5 7332; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v36, 16, v6 7333; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v34, 0xffff, v6 7334; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v28, 16, v8 7335; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v26, 0xffff, v8 7336; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v40, 16, v7 7337; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v38, 0xffff, v7 7338; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v44, 16, v9 7339; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v42, 0xffff, v9 7340; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v48, 16, v10 7341; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v46, 0xffff, v10 7342; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v24, 16, v12 7343; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7344; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v22, 0xffff, v12 7345; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v52, 16, v11 7346; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v50, 0xffff, v11 7347; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v56, 16, v13 7348; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v54, 0xffff, v13 7349; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v17 7350; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v60, 16, v14 7351; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v58, 0xffff, v14 7352; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v20, 16, v16 7353; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v18, 0xffff, v16 7354; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v10, 16, v15 7355; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v8, 0xffff, v15 7356; GCN-NOHSA-SI-NEXT: v_and_b32_e32 v0, 0xffff, v17 7357; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, v1 7358; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v9, v1 7359; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v11, v1 7360; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v55, v1 7361; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v57, v1 7362; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v51, v1 7363; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v53, v1 7364; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v43, v1 7365; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v45, v1 7366; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v39, v1 7367; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v41, v1 7368; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v31, v1 7369; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v33, v1 7370; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v6, v23 7371; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v1 7372; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, v1 7373; GCN-NOHSA-SI-NEXT: buffer_store_dword v4, off, s[12:15], 0 offset:36 ; 4-byte Folded Spill 7374; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 7375; GCN-NOHSA-SI-NEXT: buffer_store_dword v5, off, s[12:15], 0 offset:40 ; 4-byte Folded Spill 7376; GCN-NOHSA-SI-NEXT: buffer_store_dword v6, off, s[12:15], 0 offset:44 ; 4-byte Folded Spill 7377; GCN-NOHSA-SI-NEXT: buffer_store_dword v7, off, s[12:15], 0 offset:48 ; 4-byte Folded Spill 7378; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v19, v1 7379; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v59, v1 7380; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v23, v1 7381; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v47, v1 7382; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v27, v1 7383; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v35, v1 7384; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7385; GCN-NOHSA-SI-NEXT: buffer_load_dword v4, off, s[12:15], 0 offset:20 ; 4-byte Folded Reload 7386; GCN-NOHSA-SI-NEXT: buffer_load_dword v5, off, s[12:15], 0 offset:24 ; 4-byte Folded Reload 7387; GCN-NOHSA-SI-NEXT: buffer_load_dword v6, off, s[12:15], 0 offset:28 ; 4-byte Folded Reload 7388; GCN-NOHSA-SI-NEXT: buffer_load_dword v7, off, s[12:15], 0 offset:32 ; 4-byte Folded Reload 7389; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 7390; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v5, v1 7391; GCN-NOHSA-SI-NEXT: buffer_load_dword v12, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload 7392; GCN-NOHSA-SI-NEXT: buffer_load_dword v13, off, s[12:15], 0 offset:8 ; 4-byte Folded Reload 7393; GCN-NOHSA-SI-NEXT: buffer_load_dword v14, off, s[12:15], 0 offset:12 ; 4-byte Folded Reload 7394; GCN-NOHSA-SI-NEXT: buffer_load_dword v15, off, s[12:15], 0 offset:16 ; 4-byte Folded Reload 7395; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 7396; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v1 7397; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 7398; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 7399; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:240 7400; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v21, 0 7401; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v61, 0 7402; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v25, 0 7403; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 7404; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v0, v12 7405; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, v13 7406; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v2, v14 7407; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v3, 0 7408; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v7, 0 7409; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v37, 0 7410; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v29, 0 7411; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v49, 0 7412; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:208 7413; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[54:57], off, s[0:3], 0 offset:176 7414; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[50:53], off, s[0:3], 0 offset:144 7415; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[42:45], off, s[0:3], 0 offset:112 7416; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[38:41], off, s[0:3], 0 offset:80 7417; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[30:33], off, s[0:3], 0 offset:48 7418; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(5) 7419; GCN-NOHSA-SI-NEXT: buffer_load_dword v8, off, s[12:15], 0 offset:36 ; 4-byte Folded Reload 7420; GCN-NOHSA-SI-NEXT: buffer_load_dword v9, off, s[12:15], 0 offset:40 ; 4-byte Folded Reload 7421; GCN-NOHSA-SI-NEXT: buffer_load_dword v10, off, s[12:15], 0 offset:44 ; 4-byte Folded Reload 7422; GCN-NOHSA-SI-NEXT: buffer_load_dword v11, off, s[12:15], 0 offset:48 ; 4-byte Folded Reload 7423; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(0) 7424; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 7425; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[18:21], off, s[0:3], 0 offset:224 7426; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[58:61], off, s[0:3], 0 offset:192 7427; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[22:25], off, s[0:3], 0 offset:160 7428; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[46:49], off, s[0:3], 0 offset:128 7429; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[26:29], off, s[0:3], 0 offset:96 7430; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[34:37], off, s[0:3], 0 offset:64 7431; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:32 7432; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 7433; GCN-NOHSA-SI-NEXT: s_endpgm 7434; 7435; GCN-HSA-LABEL: global_zextload_v32i16_to_v32i64: 7436; GCN-HSA: ; %bb.0: 7437; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 7438; GCN-HSA-NEXT: v_mov_b32_e32 v4, 0 7439; GCN-HSA-NEXT: v_mov_b32_e32 v6, v4 7440; GCN-HSA-NEXT: v_mov_b32_e32 v8, v4 7441; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 7442; GCN-HSA-NEXT: s_add_u32 s4, s2, 16 7443; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 7444; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 7445; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 7446; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 7447; GCN-HSA-NEXT: s_add_u32 s4, s2, 32 7448; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 7449; GCN-HSA-NEXT: v_mov_b32_e32 v10, s5 7450; GCN-HSA-NEXT: v_mov_b32_e32 v9, s4 7451; GCN-HSA-NEXT: flat_load_dwordx4 v[9:12], v[9:10] 7452; GCN-HSA-NEXT: v_mov_b32_e32 v14, s3 7453; GCN-HSA-NEXT: v_mov_b32_e32 v13, s2 7454; GCN-HSA-NEXT: s_add_u32 s2, s2, 48 7455; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 7456; GCN-HSA-NEXT: v_mov_b32_e32 v18, s3 7457; GCN-HSA-NEXT: flat_load_dwordx4 v[13:16], v[13:14] 7458; GCN-HSA-NEXT: v_mov_b32_e32 v17, s2 7459; GCN-HSA-NEXT: flat_load_dwordx4 v[17:20], v[17:18] 7460; GCN-HSA-NEXT: s_add_u32 s4, s0, 48 7461; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 7462; GCN-HSA-NEXT: s_add_u32 s6, s0, 16 7463; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 7464; GCN-HSA-NEXT: s_add_u32 s8, s0, 0xf0 7465; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 7466; GCN-HSA-NEXT: s_add_u32 s10, s0, 0xd0 7467; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 7468; GCN-HSA-NEXT: s_add_u32 s12, s0, 0xb0 7469; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0 7470; GCN-HSA-NEXT: s_add_u32 s14, s0, 0x90 7471; GCN-HSA-NEXT: s_addc_u32 s15, s1, 0 7472; GCN-HSA-NEXT: s_add_u32 s16, s0, 0x70 7473; GCN-HSA-NEXT: s_addc_u32 s17, s1, 0 7474; GCN-HSA-NEXT: v_mov_b32_e32 v22, s17 7475; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x50 7476; GCN-HSA-NEXT: v_mov_b32_e32 v21, s16 7477; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 7478; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 7479; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v3 7480; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v3 7481; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[5:8] 7482; GCN-HSA-NEXT: v_mov_b32_e32 v22, s3 7483; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v1 7484; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v1 7485; GCN-HSA-NEXT: v_mov_b32_e32 v21, s2 7486; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[5:8] 7487; GCN-HSA-NEXT: v_mov_b32_e32 v22, s13 7488; GCN-HSA-NEXT: s_waitcnt vmcnt(4) 7489; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v12 7490; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v12 7491; GCN-HSA-NEXT: v_mov_b32_e32 v21, s12 7492; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[5:8] 7493; GCN-HSA-NEXT: v_mov_b32_e32 v22, s15 7494; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v10 7495; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v10 7496; GCN-HSA-NEXT: v_mov_b32_e32 v21, s14 7497; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[5:8] 7498; GCN-HSA-NEXT: v_mov_b32_e32 v22, s7 7499; GCN-HSA-NEXT: s_waitcnt vmcnt(5) 7500; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v14 7501; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v14 7502; GCN-HSA-NEXT: v_mov_b32_e32 v21, s6 7503; GCN-HSA-NEXT: flat_store_dwordx4 v[21:22], v[5:8] 7504; GCN-HSA-NEXT: s_add_u32 s2, s0, 32 7505; GCN-HSA-NEXT: s_waitcnt vmcnt(5) 7506; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v20 7507; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v20 7508; GCN-HSA-NEXT: v_mov_b32_e32 v21, s9 7509; GCN-HSA-NEXT: v_mov_b32_e32 v20, s8 7510; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[5:8] 7511; GCN-HSA-NEXT: v_mov_b32_e32 v21, s11 7512; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v18 7513; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v18 7514; GCN-HSA-NEXT: v_mov_b32_e32 v20, s10 7515; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 7516; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[5:8] 7517; GCN-HSA-NEXT: v_and_b32_e32 v3, 0xffff, v16 7518; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v15 7519; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v15 7520; GCN-HSA-NEXT: v_mov_b32_e32 v15, s3 7521; GCN-HSA-NEXT: v_mov_b32_e32 v8, 0 7522; GCN-HSA-NEXT: v_mov_b32_e32 v14, s2 7523; GCN-HSA-NEXT: flat_store_dwordx4 v[14:15], v[5:8] 7524; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 7525; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v13 7526; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v13 7527; GCN-HSA-NEXT: v_mov_b32_e32 v13, s1 7528; GCN-HSA-NEXT: v_mov_b32_e32 v8, 0 7529; GCN-HSA-NEXT: v_mov_b32_e32 v12, s0 7530; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 7531; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[5:8] 7532; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 7533; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 7534; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 7535; GCN-HSA-NEXT: v_mov_b32_e32 v8, 0 7536; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v19 7537; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v19 7538; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 7539; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[5:8] 7540; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 7541; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 7542; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0 7543; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v17 7544; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v17 7545; GCN-HSA-NEXT: v_mov_b32_e32 v8, 0 7546; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 7547; GCN-HSA-NEXT: flat_store_dwordx4 v[12:13], v[5:8] 7548; GCN-HSA-NEXT: v_lshrrev_b32_e32 v19, 16, v9 7549; GCN-HSA-NEXT: v_lshrrev_b32_e32 v7, 16, v11 7550; GCN-HSA-NEXT: v_and_b32_e32 v5, 0xffff, v11 7551; GCN-HSA-NEXT: v_mov_b32_e32 v11, s3 7552; GCN-HSA-NEXT: v_mov_b32_e32 v8, 0 7553; GCN-HSA-NEXT: v_mov_b32_e32 v10, s2 7554; GCN-HSA-NEXT: flat_store_dwordx4 v[10:11], v[5:8] 7555; GCN-HSA-NEXT: v_lshrrev_b32_e32 v10, 16, v0 7556; GCN-HSA-NEXT: v_and_b32_e32 v8, 0xffff, v0 7557; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 7558; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v16 7559; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 7560; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 7561; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[3:6] 7562; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 7563; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 7564; GCN-HSA-NEXT: v_and_b32_e32 v17, 0xffff, v9 7565; GCN-HSA-NEXT: v_mov_b32_e32 v20, 0 7566; GCN-HSA-NEXT: v_mov_b32_e32 v18, v4 7567; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 7568; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 7569; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[17:20] 7570; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 7571; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 7572; GCN-HSA-NEXT: v_lshrrev_b32_e32 v14, 16, v2 7573; GCN-HSA-NEXT: v_and_b32_e32 v12, 0xffff, v2 7574; GCN-HSA-NEXT: v_mov_b32_e32 v15, 0 7575; GCN-HSA-NEXT: v_mov_b32_e32 v13, v4 7576; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 7577; GCN-HSA-NEXT: s_add_u32 s0, s0, 64 7578; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[12:15] 7579; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 7580; GCN-HSA-NEXT: v_mov_b32_e32 v0, s0 7581; GCN-HSA-NEXT: v_mov_b32_e32 v11, 0 7582; GCN-HSA-NEXT: v_mov_b32_e32 v9, v4 7583; GCN-HSA-NEXT: v_mov_b32_e32 v1, s1 7584; GCN-HSA-NEXT: flat_store_dwordx4 v[0:1], v[8:11] 7585; GCN-HSA-NEXT: s_endpgm 7586; 7587; GCN-NOHSA-VI-LABEL: global_zextload_v32i16_to_v32i64: 7588; GCN-NOHSA-VI: ; %bb.0: 7589; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 7590; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 7591; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 7592; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 7593; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 7594; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 7595; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 7596; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 7597; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 7598; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 7599; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[30:33], off, s[8:11], 0 offset:32 7600; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[34:37], off, s[8:11], 0 offset:48 7601; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v57, 0 7602; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 7603; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 7604; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v55, 0 7605; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v53, v57 7606; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v46, v57 7607; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v48, v57 7608; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v28, v57 7609; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v9, v57 7610; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v11, v57 7611; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v19, 0 7612; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v15, 0 7613; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v50, v57 7614; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v13, v57 7615; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v17, v57 7616; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v41, 0 7617; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v23, 0 7618; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v43, v57 7619; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v39, v57 7620; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v59, v57 7621; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v25, v57 7622; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v21, v57 7623; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 7624; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v10, 16, v1 7625; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(1) 7626; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v40, 16, v30 7627; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v38, 0xffff, v30 7628; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v44, 16, v32 7629; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v42, 0xffff, v32 7630; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v32, 16, v31 7631; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v30, 0xffff, v31 7632; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v47, 16, v33 7633; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v45, 0xffff, v33 7634; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(0) 7635; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v54, 16, v36 7636; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v52, 0xffff, v36 7637; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v31, v57 7638; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v33, v57 7639; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v14, 16, v2 7640; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v18, 16, v0 7641; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v16, 0xffff, v0 7642; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v12, 0xffff, v2 7643; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v8, 0xffff, v1 7644; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v3 7645; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v0, 0xffff, v3 7646; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v22, 16, v4 7647; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v20, 0xffff, v4 7648; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v26, 16, v6 7649; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v24, 0xffff, v6 7650; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v6, 16, v5 7651; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v4, 0xffff, v5 7652; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v29, 16, v7 7653; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v27, 0xffff, v7 7654; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v51, 16, v34 7655; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v49, 0xffff, v34 7656; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v36, 16, v35 7657; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v34, 0xffff, v35 7658; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v58, 16, v37 7659; GCN-NOHSA-VI-NEXT: v_and_b32_e32 v56, 0xffff, v37 7660; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v35, v57 7661; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v37, v57 7662; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[30:33], off, s[0:3], 0 offset:144 7663; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v5, v57 7664; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v30, v57 7665; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v7, v57 7666; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, v57 7667; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v3, v57 7668; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[52:55], off, s[0:3], 0 offset:224 7669; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[34:37], off, s[0:3], 0 offset:208 7670; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v52, 0 7671; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[45:48], off, s[0:3], 0 offset:176 7672; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[27:30], off, s[0:3], 0 offset:112 7673; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:80 7674; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:48 7675; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 offset:16 7676; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v45, 0 7677; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v27, 0 7678; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[49:52], off, s[0:3], 0 offset:192 7679; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[42:45], off, s[0:3], 0 offset:160 7680; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[38:41], off, s[0:3], 0 offset:128 7681; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[56:59], off, s[0:3], 0 offset:240 7682; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[24:27], off, s[0:3], 0 offset:96 7683; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[20:23], off, s[0:3], 0 offset:64 7684; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[12:15], off, s[0:3], 0 offset:32 7685; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 7686; GCN-NOHSA-VI-NEXT: s_endpgm 7687; 7688; EG-LABEL: global_zextload_v32i16_to_v32i64: 7689; EG: ; %bb.0: 7690; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 7691; EG-NEXT: TEX 2 @22 7692; EG-NEXT: ALU 33, @31, KC0[], KC1[] 7693; EG-NEXT: TEX 0 @28 7694; EG-NEXT: ALU 93, @65, KC0[CB0:0-32], KC1[] 7695; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T23.XYZW, T50.X, 0 7696; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T24.XYZW, T49.X, 0 7697; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T25.XYZW, T48.X, 0 7698; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T47.X, 0 7699; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T26.XYZW, T46.X, 0 7700; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T27.XYZW, T45.X, 0 7701; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T28.XYZW, T44.X, 0 7702; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T22.XYZW, T43.X, 0 7703; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T42.X, 0 7704; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T30.XYZW, T41.X, 0 7705; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T31.XYZW, T40.X, 0 7706; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T21.XYZW, T39.X, 0 7707; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T32.XYZW, T38.X, 0 7708; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T33.XYZW, T37.X, 0 7709; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T34.XYZW, T36.X, 0 7710; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T29.XYZW, T35.X, 1 7711; EG-NEXT: CF_END 7712; EG-NEXT: Fetch clause starting at 22: 7713; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1 7714; EG-NEXT: VTX_READ_128 T21.XYZW, T19.X, 16, #1 7715; EG-NEXT: VTX_READ_128 T22.XYZW, T19.X, 32, #1 7716; EG-NEXT: Fetch clause starting at 28: 7717; EG-NEXT: VTX_READ_128 T29.XYZW, T19.X, 0, #1 7718; EG-NEXT: ALU clause starting at 30: 7719; EG-NEXT: MOV * T19.X, KC0[2].Z, 7720; EG-NEXT: ALU clause starting at 31: 7721; EG-NEXT: LSHR * T23.Z, T20.Z, literal.x, 7722; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7723; EG-NEXT: AND_INT T23.X, T20.Z, literal.x, 7724; EG-NEXT: MOV T23.Y, 0.0, 7725; EG-NEXT: LSHR T24.Z, T20.W, literal.y, 7726; EG-NEXT: AND_INT * T24.X, T20.W, literal.x, 7727; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7728; EG-NEXT: MOV T24.Y, 0.0, 7729; EG-NEXT: LSHR * T25.Z, T20.X, literal.x, 7730; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7731; EG-NEXT: AND_INT T25.X, T20.X, literal.x, 7732; EG-NEXT: MOV T25.Y, 0.0, 7733; EG-NEXT: LSHR T20.Z, T20.Y, literal.y, 7734; EG-NEXT: AND_INT * T20.X, T20.Y, literal.x, 7735; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7736; EG-NEXT: MOV T20.Y, 0.0, 7737; EG-NEXT: LSHR * T26.Z, T22.Z, literal.x, 7738; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7739; EG-NEXT: AND_INT T26.X, T22.Z, literal.x, 7740; EG-NEXT: MOV T26.Y, 0.0, 7741; EG-NEXT: LSHR T27.Z, T22.W, literal.y, 7742; EG-NEXT: AND_INT * T27.X, T22.W, literal.x, 7743; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7744; EG-NEXT: MOV T27.Y, 0.0, 7745; EG-NEXT: LSHR * T28.Z, T22.X, literal.x, 7746; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7747; EG-NEXT: AND_INT T28.X, T22.X, literal.x, 7748; EG-NEXT: MOV T28.Y, 0.0, 7749; EG-NEXT: LSHR T22.Z, T22.Y, literal.y, 7750; EG-NEXT: AND_INT * T22.X, T22.Y, literal.x, 7751; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7752; EG-NEXT: MOV T22.Y, 0.0, 7753; EG-NEXT: LSHR * T19.Z, T21.Z, literal.x, 7754; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7755; EG-NEXT: ALU clause starting at 65: 7756; EG-NEXT: AND_INT T19.X, T21.Z, literal.x, 7757; EG-NEXT: MOV T19.Y, 0.0, 7758; EG-NEXT: LSHR T30.Z, T21.W, literal.y, 7759; EG-NEXT: AND_INT * T30.X, T21.W, literal.x, 7760; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7761; EG-NEXT: MOV T30.Y, 0.0, 7762; EG-NEXT: LSHR * T31.Z, T21.X, literal.x, 7763; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7764; EG-NEXT: AND_INT T31.X, T21.X, literal.x, 7765; EG-NEXT: MOV T31.Y, 0.0, 7766; EG-NEXT: LSHR T21.Z, T21.Y, literal.y, 7767; EG-NEXT: AND_INT * T21.X, T21.Y, literal.x, 7768; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7769; EG-NEXT: MOV T21.Y, 0.0, 7770; EG-NEXT: LSHR * T32.Z, T29.Z, literal.x, 7771; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7772; EG-NEXT: AND_INT T32.X, T29.Z, literal.x, 7773; EG-NEXT: MOV T32.Y, 0.0, 7774; EG-NEXT: LSHR T33.Z, T29.W, literal.y, 7775; EG-NEXT: AND_INT * T33.X, T29.W, literal.x, 7776; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7777; EG-NEXT: MOV T33.Y, 0.0, 7778; EG-NEXT: LSHR * T34.Z, T29.X, literal.x, 7779; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7780; EG-NEXT: AND_INT T34.X, T29.X, literal.x, 7781; EG-NEXT: MOV T34.Y, 0.0, 7782; EG-NEXT: LSHR T29.Z, T29.Y, literal.y, 7783; EG-NEXT: AND_INT * T29.X, T29.Y, literal.x, 7784; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7785; EG-NEXT: MOV T29.Y, 0.0, 7786; EG-NEXT: MOV T23.W, 0.0, 7787; EG-NEXT: MOV * T24.W, 0.0, 7788; EG-NEXT: MOV T25.W, 0.0, 7789; EG-NEXT: MOV * T20.W, 0.0, 7790; EG-NEXT: MOV T26.W, 0.0, 7791; EG-NEXT: MOV * T27.W, 0.0, 7792; EG-NEXT: MOV T28.W, 0.0, 7793; EG-NEXT: MOV * T22.W, 0.0, 7794; EG-NEXT: MOV T19.W, 0.0, 7795; EG-NEXT: MOV * T30.W, 0.0, 7796; EG-NEXT: MOV T31.W, 0.0, 7797; EG-NEXT: MOV * T21.W, 0.0, 7798; EG-NEXT: MOV T32.W, 0.0, 7799; EG-NEXT: MOV * T33.W, 0.0, 7800; EG-NEXT: MOV T34.W, 0.0, 7801; EG-NEXT: MOV * T29.W, 0.0, 7802; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 7803; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7804; EG-NEXT: LSHR T35.X, PV.W, literal.x, 7805; EG-NEXT: LSHR * T36.X, KC0[2].Y, literal.x, 7806; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7807; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 7808; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 7809; EG-NEXT: LSHR T37.X, PV.W, literal.x, 7810; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7811; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 7812; EG-NEXT: LSHR T38.X, PV.W, literal.x, 7813; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7814; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 7815; EG-NEXT: LSHR T39.X, PV.W, literal.x, 7816; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7817; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7818; EG-NEXT: LSHR T40.X, PV.W, literal.x, 7819; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7820; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 7821; EG-NEXT: LSHR T41.X, PV.W, literal.x, 7822; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7823; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7824; EG-NEXT: LSHR T42.X, PV.W, literal.x, 7825; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7826; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 7827; EG-NEXT: LSHR T43.X, PV.W, literal.x, 7828; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7829; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 7830; EG-NEXT: LSHR T44.X, PV.W, literal.x, 7831; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7832; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 7833; EG-NEXT: LSHR T45.X, PV.W, literal.x, 7834; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7835; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 7836; EG-NEXT: LSHR T46.X, PV.W, literal.x, 7837; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7838; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 7839; EG-NEXT: LSHR T47.X, PV.W, literal.x, 7840; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7841; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 7842; EG-NEXT: LSHR T48.X, PV.W, literal.x, 7843; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7844; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) 7845; EG-NEXT: LSHR T49.X, PV.W, literal.x, 7846; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7847; EG-NEXT: 2(2.802597e-45), 224(3.138909e-43) 7848; EG-NEXT: LSHR * T50.X, PV.W, literal.x, 7849; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 7850; 7851; CM-LABEL: global_zextload_v32i16_to_v32i64: 7852; CM: ; %bb.0: 7853; CM-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 7854; CM-NEXT: TEX 2 @22 7855; CM-NEXT: ALU 33, @31, KC0[], KC1[] 7856; CM-NEXT: TEX 0 @28 7857; CM-NEXT: ALU 94, @65, KC0[CB0:0-32], KC1[] 7858; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T23, T50.X 7859; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T24, T49.X 7860; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T25, T48.X 7861; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T26, T47.X 7862; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T20, T46.X 7863; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T27, T45.X 7864; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T28, T44.X 7865; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T29, T43.X 7866; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T42.X 7867; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T30, T41.X 7868; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T31, T40.X 7869; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T32, T39.X 7870; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T21, T38.X 7871; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T33, T37.X 7872; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T34, T36.X 7873; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T22.X 7874; CM-NEXT: CF_END 7875; CM-NEXT: Fetch clause starting at 22: 7876; CM-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 7877; CM-NEXT: VTX_READ_128 T21.XYZW, T19.X, 32, #1 7878; CM-NEXT: VTX_READ_128 T22.XYZW, T19.X, 16, #1 7879; CM-NEXT: Fetch clause starting at 28: 7880; CM-NEXT: VTX_READ_128 T22.XYZW, T19.X, 48, #1 7881; CM-NEXT: ALU clause starting at 30: 7882; CM-NEXT: MOV * T19.X, KC0[2].Z, 7883; CM-NEXT: ALU clause starting at 31: 7884; CM-NEXT: LSHR * T23.Z, T20.Y, literal.x, 7885; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 7886; CM-NEXT: AND_INT T23.X, T20.Y, literal.x, 7887; CM-NEXT: MOV T23.Y, 0.0, 7888; CM-NEXT: LSHR * T24.Z, T20.X, literal.y, 7889; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7890; CM-NEXT: AND_INT T24.X, T20.X, literal.x, 7891; CM-NEXT: MOV T24.Y, 0.0, 7892; CM-NEXT: LSHR * T25.Z, T20.W, literal.y, 7893; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7894; CM-NEXT: AND_INT T25.X, T20.W, literal.x, 7895; CM-NEXT: MOV T25.Y, 0.0, 7896; CM-NEXT: LSHR * T26.Z, T20.Z, literal.y, 7897; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7898; CM-NEXT: AND_INT T26.X, T20.Z, literal.x, 7899; CM-NEXT: MOV T26.Y, 0.0, 7900; CM-NEXT: LSHR * T20.Z, T22.Y, literal.y, 7901; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7902; CM-NEXT: AND_INT T20.X, T22.Y, literal.x, 7903; CM-NEXT: MOV T20.Y, 0.0, 7904; CM-NEXT: LSHR * T27.Z, T22.X, literal.y, 7905; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7906; CM-NEXT: AND_INT T27.X, T22.X, literal.x, 7907; CM-NEXT: MOV T27.Y, 0.0, 7908; CM-NEXT: LSHR * T28.Z, T22.W, literal.y, 7909; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7910; CM-NEXT: AND_INT T28.X, T22.W, literal.x, 7911; CM-NEXT: MOV T28.Y, 0.0, 7912; CM-NEXT: LSHR * T29.Z, T22.Z, literal.y, 7913; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7914; CM-NEXT: AND_INT T29.X, T22.Z, literal.x, 7915; CM-NEXT: MOV T29.Y, 0.0, 7916; CM-NEXT: LSHR * T19.Z, T21.Y, literal.y, 7917; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7918; CM-NEXT: ALU clause starting at 65: 7919; CM-NEXT: AND_INT T19.X, T21.Y, literal.x, 7920; CM-NEXT: MOV T19.Y, 0.0, 7921; CM-NEXT: LSHR * T30.Z, T21.X, literal.y, 7922; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7923; CM-NEXT: AND_INT T30.X, T21.X, literal.x, 7924; CM-NEXT: MOV T30.Y, 0.0, 7925; CM-NEXT: LSHR * T31.Z, T21.W, literal.y, 7926; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7927; CM-NEXT: AND_INT T31.X, T21.W, literal.x, 7928; CM-NEXT: MOV T31.Y, 0.0, 7929; CM-NEXT: LSHR * T32.Z, T21.Z, literal.y, 7930; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7931; CM-NEXT: AND_INT T32.X, T21.Z, literal.x, 7932; CM-NEXT: MOV T32.Y, 0.0, 7933; CM-NEXT: LSHR * T21.Z, T22.Y, literal.y, 7934; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7935; CM-NEXT: AND_INT T21.X, T22.Y, literal.x, 7936; CM-NEXT: MOV T21.Y, 0.0, 7937; CM-NEXT: LSHR * T33.Z, T22.X, literal.y, 7938; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7939; CM-NEXT: AND_INT T33.X, T22.X, literal.x, 7940; CM-NEXT: MOV T33.Y, 0.0, 7941; CM-NEXT: LSHR * T34.Z, T22.W, literal.y, 7942; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7943; CM-NEXT: AND_INT T34.X, T22.W, literal.x, 7944; CM-NEXT: MOV T34.Y, 0.0, 7945; CM-NEXT: LSHR * T35.Z, T22.Z, literal.y, 7946; CM-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 7947; CM-NEXT: AND_INT T35.X, T22.Z, literal.x, 7948; CM-NEXT: MOV T35.Y, 0.0, 7949; CM-NEXT: MOV * T23.W, 0.0, 7950; CM-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 7951; CM-NEXT: MOV * T24.W, 0.0, 7952; CM-NEXT: MOV * T25.W, 0.0, 7953; CM-NEXT: MOV * T26.W, 0.0, 7954; CM-NEXT: MOV * T20.W, 0.0, 7955; CM-NEXT: MOV * T27.W, 0.0, 7956; CM-NEXT: MOV * T28.W, 0.0, 7957; CM-NEXT: MOV * T29.W, 0.0, 7958; CM-NEXT: MOV * T19.W, 0.0, 7959; CM-NEXT: MOV * T30.W, 0.0, 7960; CM-NEXT: MOV * T31.W, 0.0, 7961; CM-NEXT: MOV * T32.W, 0.0, 7962; CM-NEXT: MOV * T21.W, 0.0, 7963; CM-NEXT: MOV * T33.W, 0.0, 7964; CM-NEXT: MOV * T34.W, 0.0, 7965; CM-NEXT: MOV * T35.W, 0.0, 7966; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 7967; CM-NEXT: 224(3.138909e-43), 0(0.000000e+00) 7968; CM-NEXT: LSHR T22.X, PV.W, literal.x, 7969; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7970; CM-NEXT: 2(2.802597e-45), 240(3.363116e-43) 7971; CM-NEXT: LSHR T36.X, PV.W, literal.x, 7972; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7973; CM-NEXT: 2(2.802597e-45), 192(2.690493e-43) 7974; CM-NEXT: LSHR T37.X, PV.W, literal.x, 7975; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7976; CM-NEXT: 2(2.802597e-45), 208(2.914701e-43) 7977; CM-NEXT: LSHR T38.X, PV.W, literal.x, 7978; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7979; CM-NEXT: 2(2.802597e-45), 160(2.242078e-43) 7980; CM-NEXT: LSHR T39.X, PV.W, literal.x, 7981; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7982; CM-NEXT: 2(2.802597e-45), 176(2.466285e-43) 7983; CM-NEXT: LSHR T40.X, PV.W, literal.x, 7984; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7985; CM-NEXT: 2(2.802597e-45), 128(1.793662e-43) 7986; CM-NEXT: LSHR T41.X, PV.W, literal.x, 7987; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7988; CM-NEXT: 2(2.802597e-45), 144(2.017870e-43) 7989; CM-NEXT: LSHR T42.X, PV.W, literal.x, 7990; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7991; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43) 7992; CM-NEXT: LSHR T43.X, PV.W, literal.x, 7993; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7994; CM-NEXT: 2(2.802597e-45), 112(1.569454e-43) 7995; CM-NEXT: LSHR T44.X, PV.W, literal.x, 7996; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 7997; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44) 7998; CM-NEXT: LSHR T45.X, PV.W, literal.x, 7999; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8000; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43) 8001; CM-NEXT: LSHR T46.X, PV.W, literal.x, 8002; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8003; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 8004; CM-NEXT: LSHR T47.X, PV.W, literal.x, 8005; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8006; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44) 8007; CM-NEXT: LSHR * T48.X, PV.W, literal.x, 8008; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 8009; CM-NEXT: LSHR T49.X, KC0[2].Y, literal.x, 8010; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8011; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 8012; CM-NEXT: LSHR * T50.X, PV.W, literal.x, 8013; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00) 8014 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 8015 %ext = zext <32 x i16> %load to <32 x i64> 8016 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 8017 ret void 8018} 8019 8020define amdgpu_kernel void @global_sextload_v32i16_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i16> addrspace(1)* %in) #0 { 8021; GCN-NOHSA-SI-LABEL: global_sextload_v32i16_to_v32i64: 8022; GCN-NOHSA-SI: ; %bb.0: 8023; GCN-NOHSA-SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 8024; GCN-NOHSA-SI-NEXT: s_mov_b32 s3, 0xf000 8025; GCN-NOHSA-SI-NEXT: s_mov_b32 s2, -1 8026; GCN-NOHSA-SI-NEXT: s_mov_b32 s10, s2 8027; GCN-NOHSA-SI-NEXT: s_mov_b32 s11, s3 8028; GCN-NOHSA-SI-NEXT: s_waitcnt lgkmcnt(0) 8029; GCN-NOHSA-SI-NEXT: s_mov_b32 s8, s6 8030; GCN-NOHSA-SI-NEXT: s_mov_b32 s9, s7 8031; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 8032; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:32 8033; GCN-NOHSA-SI-NEXT: s_mov_b32 s0, s4 8034; GCN-NOHSA-SI-NEXT: s_mov_b32 s1, s5 8035; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 8036; GCN-NOHSA-SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 8037; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(3) 8038; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v16, v15 8039; GCN-NOHSA-SI-NEXT: v_bfe_i32 v16, v16, 0, 16 8040; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[18:19], v[14:15], 48 8041; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8042; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:240 8043; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8044; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[17:18], v[12:13], 48 8045; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v13, 0, 16 8046; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8047; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:208 8048; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(4) 8049; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v13, v3 8050; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8051; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v13, 0, 16 8052; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[17:18], v[2:3], 48 8053; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8054; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:176 8055; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8056; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[17:18], v[0:1], 48 8057; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v1, 0, 16 8058; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8059; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:144 8060; GCN-NOHSA-SI-NEXT: s_waitcnt vmcnt(4) 8061; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, v7 8062; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8063; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v1, 0, 16 8064; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[17:18], v[6:7], 48 8065; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8066; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:112 8067; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8068; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[17:18], v[4:5], 48 8069; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v5, 0, 16 8070; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8071; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:80 8072; GCN-NOHSA-SI-NEXT: v_mov_b32_e32 v1, v11 8073; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8074; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v1, 0, 16 8075; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[17:18], v[10:11], 48 8076; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8077; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:48 8078; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8079; GCN-NOHSA-SI-NEXT: v_ashr_i64 v[17:18], v[8:9], 48 8080; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v9, 0, 16 8081; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8082; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:16 8083; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v14 8084; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8085; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v1, 0, 16 8086; GCN-NOHSA-SI-NEXT: v_bfe_i32 v13, v14, 0, 16 8087; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8088; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8089; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:224 8090; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v12 8091; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 8092; GCN-NOHSA-SI-NEXT: v_bfe_i32 v11, v12, 0, 16 8093; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8094; GCN-NOHSA-SI-NEXT: v_bfe_i32 v13, v1, 0, 16 8095; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v9, 16, v4 8096; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8097; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8098; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:192 8099; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v7, 16, v10 8100; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8101; GCN-NOHSA-SI-NEXT: v_bfe_i32 v13, v3, 0, 16 8102; GCN-NOHSA-SI-NEXT: v_bfe_i32 v11, v2, 0, 16 8103; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8104; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8105; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:160 8106; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v1, 16, v8 8107; GCN-NOHSA-SI-NEXT: v_bfe_i32 v3, v1, 0, 16 8108; GCN-NOHSA-SI-NEXT: v_bfe_i32 v1, v8, 0, 16 8109; GCN-NOHSA-SI-NEXT: v_bfe_i32 v5, v10, 0, 16 8110; GCN-NOHSA-SI-NEXT: v_bfe_i32 v7, v7, 0, 16 8111; GCN-NOHSA-SI-NEXT: s_waitcnt expcnt(0) 8112; GCN-NOHSA-SI-NEXT: v_bfe_i32 v11, v9, 0, 16 8113; GCN-NOHSA-SI-NEXT: v_bfe_i32 v9, v4, 0, 16 8114; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v6 8115; GCN-NOHSA-SI-NEXT: v_bfe_i32 v13, v6, 0, 16 8116; GCN-NOHSA-SI-NEXT: v_bfe_i32 v15, v2, 0, 16 8117; GCN-NOHSA-SI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 8118; GCN-NOHSA-SI-NEXT: v_bfe_i32 v17, v0, 0, 16 8119; GCN-NOHSA-SI-NEXT: v_bfe_i32 v19, v2, 0, 16 8120; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v2, 31, v1 8121; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5 8122; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 8123; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8124; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v18, 31, v17 8125; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v4, 31, v3 8126; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v8, 31, v7 8127; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8128; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8129; GCN-NOHSA-SI-NEXT: v_ashrrev_i32_e32 v20, 31, v19 8130; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:128 8131; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:96 8132; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:64 8133; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[5:8], off, s[0:3], 0 offset:32 8134; GCN-NOHSA-SI-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0 8135; GCN-NOHSA-SI-NEXT: s_endpgm 8136; 8137; GCN-HSA-LABEL: global_sextload_v32i16_to_v32i64: 8138; GCN-HSA: ; %bb.0: 8139; GCN-HSA-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0 8140; GCN-HSA-NEXT: s_waitcnt lgkmcnt(0) 8141; GCN-HSA-NEXT: v_mov_b32_e32 v0, s2 8142; GCN-HSA-NEXT: v_mov_b32_e32 v1, s3 8143; GCN-HSA-NEXT: flat_load_dwordx4 v[8:11], v[0:1] 8144; GCN-HSA-NEXT: s_add_u32 s4, s2, 48 8145; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 8146; GCN-HSA-NEXT: v_mov_b32_e32 v0, s4 8147; GCN-HSA-NEXT: v_mov_b32_e32 v1, s5 8148; GCN-HSA-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 8149; GCN-HSA-NEXT: s_add_u32 s4, s2, 32 8150; GCN-HSA-NEXT: s_addc_u32 s5, s3, 0 8151; GCN-HSA-NEXT: v_mov_b32_e32 v4, s4 8152; GCN-HSA-NEXT: v_mov_b32_e32 v5, s5 8153; GCN-HSA-NEXT: flat_load_dwordx4 v[4:7], v[4:5] 8154; GCN-HSA-NEXT: s_add_u32 s2, s2, 16 8155; GCN-HSA-NEXT: s_addc_u32 s3, s3, 0 8156; GCN-HSA-NEXT: v_mov_b32_e32 v13, s3 8157; GCN-HSA-NEXT: v_mov_b32_e32 v12, s2 8158; GCN-HSA-NEXT: flat_load_dwordx4 v[12:15], v[12:13] 8159; GCN-HSA-NEXT: s_add_u32 s2, s0, 48 8160; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8161; GCN-HSA-NEXT: s_add_u32 s4, s0, 16 8162; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 8163; GCN-HSA-NEXT: v_mov_b32_e32 v21, s5 8164; GCN-HSA-NEXT: v_mov_b32_e32 v20, s4 8165; GCN-HSA-NEXT: s_waitcnt vmcnt(3) 8166; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[8:9], 48 8167; GCN-HSA-NEXT: v_bfe_i32 v16, v9, 0, 16 8168; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8169; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 8170; GCN-HSA-NEXT: v_mov_b32_e32 v21, s3 8171; GCN-HSA-NEXT: v_mov_b32_e32 v20, s2 8172; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xf0 8173; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8174; GCN-HSA-NEXT: s_add_u32 s4, s0, 0xd0 8175; GCN-HSA-NEXT: s_addc_u32 s5, s1, 0 8176; GCN-HSA-NEXT: s_add_u32 s6, s0, 0xb0 8177; GCN-HSA-NEXT: s_addc_u32 s7, s1, 0 8178; GCN-HSA-NEXT: s_add_u32 s8, s0, 0x90 8179; GCN-HSA-NEXT: s_addc_u32 s9, s1, 0 8180; GCN-HSA-NEXT: s_add_u32 s10, s0, 0x70 8181; GCN-HSA-NEXT: s_addc_u32 s11, s1, 0 8182; GCN-HSA-NEXT: v_mov_b32_e32 v9, v11 8183; GCN-HSA-NEXT: s_add_u32 s12, s0, 0x50 8184; GCN-HSA-NEXT: v_bfe_i32 v16, v9, 0, 16 8185; GCN-HSA-NEXT: v_ashr_i64 v[18:19], v[10:11], 48 8186; GCN-HSA-NEXT: s_addc_u32 s13, s1, 0 8187; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8188; GCN-HSA-NEXT: s_add_u32 s14, s0, 32 8189; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v10 8190; GCN-HSA-NEXT: flat_store_dwordx4 v[20:21], v[16:19] 8191; GCN-HSA-NEXT: s_addc_u32 s15, s1, 0 8192; GCN-HSA-NEXT: v_bfe_i32 v18, v9, 0, 16 8193; GCN-HSA-NEXT: v_bfe_i32 v16, v10, 0, 16 8194; GCN-HSA-NEXT: v_mov_b32_e32 v9, s14 8195; GCN-HSA-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8196; GCN-HSA-NEXT: v_ashrrev_i32_e32 v19, 31, v18 8197; GCN-HSA-NEXT: v_mov_b32_e32 v10, s15 8198; GCN-HSA-NEXT: flat_store_dwordx4 v[9:10], v[16:19] 8199; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v8 8200; GCN-HSA-NEXT: v_mov_b32_e32 v17, s1 8201; GCN-HSA-NEXT: v_bfe_i32 v8, v8, 0, 16 8202; GCN-HSA-NEXT: v_bfe_i32 v10, v9, 0, 16 8203; GCN-HSA-NEXT: v_mov_b32_e32 v16, s0 8204; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 8205; GCN-HSA-NEXT: v_ashrrev_i32_e32 v11, 31, v10 8206; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 8207; GCN-HSA-NEXT: v_mov_b32_e32 v17, s5 8208; GCN-HSA-NEXT: s_waitcnt vmcnt(6) 8209; GCN-HSA-NEXT: v_ashr_i64 v[10:11], v[0:1], 48 8210; GCN-HSA-NEXT: v_bfe_i32 v8, v1, 0, 16 8211; GCN-HSA-NEXT: v_mov_b32_e32 v16, s4 8212; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 8213; GCN-HSA-NEXT: v_mov_b32_e32 v1, v3 8214; GCN-HSA-NEXT: v_mov_b32_e32 v19, s3 8215; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 8216; GCN-HSA-NEXT: v_mov_b32_e32 v18, s2 8217; GCN-HSA-NEXT: v_bfe_i32 v8, v1, 0, 16 8218; GCN-HSA-NEXT: v_ashr_i64 v[10:11], v[2:3], 48 8219; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 8220; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[8:11] 8221; GCN-HSA-NEXT: v_mov_b32_e32 v19, s9 8222; GCN-HSA-NEXT: s_waitcnt vmcnt(7) 8223; GCN-HSA-NEXT: v_ashr_i64 v[10:11], v[4:5], 48 8224; GCN-HSA-NEXT: v_bfe_i32 v8, v5, 0, 16 8225; GCN-HSA-NEXT: v_mov_b32_e32 v18, s8 8226; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 8227; GCN-HSA-NEXT: v_mov_b32_e32 v1, v7 8228; GCN-HSA-NEXT: v_mov_b32_e32 v17, s7 8229; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[8:11] 8230; GCN-HSA-NEXT: v_mov_b32_e32 v16, s6 8231; GCN-HSA-NEXT: v_bfe_i32 v8, v1, 0, 16 8232; GCN-HSA-NEXT: v_ashr_i64 v[10:11], v[6:7], 48 8233; GCN-HSA-NEXT: v_ashrrev_i32_e32 v9, 31, v8 8234; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[8:11] 8235; GCN-HSA-NEXT: v_mov_b32_e32 v17, s13 8236; GCN-HSA-NEXT: s_waitcnt vmcnt(8) 8237; GCN-HSA-NEXT: v_ashr_i64 v[9:10], v[12:13], 48 8238; GCN-HSA-NEXT: v_bfe_i32 v7, v13, 0, 16 8239; GCN-HSA-NEXT: v_mov_b32_e32 v16, s12 8240; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 31, v7 8241; GCN-HSA-NEXT: v_mov_b32_e32 v3, v15 8242; GCN-HSA-NEXT: v_mov_b32_e32 v19, s11 8243; GCN-HSA-NEXT: flat_store_dwordx4 v[16:17], v[7:10] 8244; GCN-HSA-NEXT: v_mov_b32_e32 v18, s10 8245; GCN-HSA-NEXT: v_bfe_i32 v7, v3, 0, 16 8246; GCN-HSA-NEXT: v_ashr_i64 v[9:10], v[14:15], 48 8247; GCN-HSA-NEXT: v_lshrrev_b32_e32 v1, 16, v2 8248; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 31, v7 8249; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xe0 8250; GCN-HSA-NEXT: flat_store_dwordx4 v[18:19], v[7:10] 8251; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8252; GCN-HSA-NEXT: v_bfe_i32 v7, v2, 0, 16 8253; GCN-HSA-NEXT: v_bfe_i32 v9, v1, 0, 16 8254; GCN-HSA-NEXT: v_mov_b32_e32 v1, s2 8255; GCN-HSA-NEXT: v_mov_b32_e32 v2, s3 8256; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xc0 8257; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 31, v7 8258; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 31, v9 8259; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8260; GCN-HSA-NEXT: flat_store_dwordx4 v[1:2], v[7:10] 8261; GCN-HSA-NEXT: v_lshrrev_b32_e32 v5, 16, v6 8262; GCN-HSA-NEXT: v_mov_b32_e32 v8, s3 8263; GCN-HSA-NEXT: v_mov_b32_e32 v7, s2 8264; GCN-HSA-NEXT: s_add_u32 s2, s0, 0xa0 8265; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8266; GCN-HSA-NEXT: v_lshrrev_b32_e32 v13, 16, v4 8267; GCN-HSA-NEXT: v_bfe_i32 v11, v4, 0, 16 8268; GCN-HSA-NEXT: v_bfe_i32 v17, v5, 0, 16 8269; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 8270; GCN-HSA-NEXT: v_bfe_i32 v15, v6, 0, 16 8271; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 8272; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x80 8273; GCN-HSA-NEXT: v_lshrrev_b32_e32 v3, 16, v0 8274; GCN-HSA-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8275; GCN-HSA-NEXT: v_ashrrev_i32_e32 v18, 31, v17 8276; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8277; GCN-HSA-NEXT: v_bfe_i32 v0, v0, 0, 16 8278; GCN-HSA-NEXT: v_bfe_i32 v2, v3, 0, 16 8279; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[15:18] 8280; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 8281; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 8282; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 8283; GCN-HSA-NEXT: v_bfe_i32 v13, v13, 0, 16 8284; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 8285; GCN-HSA-NEXT: s_add_u32 s2, s0, 0x60 8286; GCN-HSA-NEXT: v_lshrrev_b32_e32 v9, 16, v14 8287; GCN-HSA-NEXT: flat_store_dwordx4 v[7:8], v[0:3] 8288; GCN-HSA-NEXT: v_bfe_i32 v7, v14, 0, 16 8289; GCN-HSA-NEXT: v_lshrrev_b32_e32 v2, 16, v12 8290; GCN-HSA-NEXT: v_bfe_i32 v0, v12, 0, 16 8291; GCN-HSA-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8292; GCN-HSA-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8293; GCN-HSA-NEXT: s_addc_u32 s3, s1, 0 8294; GCN-HSA-NEXT: v_bfe_i32 v9, v9, 0, 16 8295; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[11:14] 8296; GCN-HSA-NEXT: v_mov_b32_e32 v5, s3 8297; GCN-HSA-NEXT: s_add_u32 s0, s0, 64 8298; GCN-HSA-NEXT: v_ashrrev_i32_e32 v8, 31, v7 8299; GCN-HSA-NEXT: v_ashrrev_i32_e32 v10, 31, v9 8300; GCN-HSA-NEXT: v_mov_b32_e32 v4, s2 8301; GCN-HSA-NEXT: s_addc_u32 s1, s1, 0 8302; GCN-HSA-NEXT: v_bfe_i32 v2, v2, 0, 16 8303; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[7:10] 8304; GCN-HSA-NEXT: v_mov_b32_e32 v5, s1 8305; GCN-HSA-NEXT: v_ashrrev_i32_e32 v1, 31, v0 8306; GCN-HSA-NEXT: v_ashrrev_i32_e32 v3, 31, v2 8307; GCN-HSA-NEXT: v_mov_b32_e32 v4, s0 8308; GCN-HSA-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 8309; GCN-HSA-NEXT: s_endpgm 8310; 8311; GCN-NOHSA-VI-LABEL: global_sextload_v32i16_to_v32i64: 8312; GCN-NOHSA-VI: ; %bb.0: 8313; GCN-NOHSA-VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 8314; GCN-NOHSA-VI-NEXT: s_mov_b32 s3, 0xf000 8315; GCN-NOHSA-VI-NEXT: s_mov_b32 s2, -1 8316; GCN-NOHSA-VI-NEXT: s_mov_b32 s10, s2 8317; GCN-NOHSA-VI-NEXT: s_mov_b32 s11, s3 8318; GCN-NOHSA-VI-NEXT: s_waitcnt lgkmcnt(0) 8319; GCN-NOHSA-VI-NEXT: s_mov_b32 s8, s6 8320; GCN-NOHSA-VI-NEXT: s_mov_b32 s9, s7 8321; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[12:15], off, s[8:11], 0 offset:48 8322; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:32 8323; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 8324; GCN-NOHSA-VI-NEXT: buffer_load_dwordx4 v[8:11], off, s[8:11], 0 8325; GCN-NOHSA-VI-NEXT: s_mov_b32 s0, s4 8326; GCN-NOHSA-VI-NEXT: s_mov_b32 s1, s5 8327; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 8328; GCN-NOHSA-VI-NEXT: v_ashrrev_i64 v[18:19], 48, v[12:13] 8329; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v13, 0, 16 8330; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8331; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:208 8332; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v14, 0, 16 8333; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 8334; GCN-NOHSA-VI-NEXT: v_ashrrev_i64 v[18:19], 48, v[0:1] 8335; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v1, 0, 16 8336; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8337; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:144 8338; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, v15 8339; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 8340; GCN-NOHSA-VI-NEXT: v_ashrrev_i64 v[18:19], 48, v[4:5] 8341; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v5, 0, 16 8342; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8343; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:80 8344; GCN-NOHSA-VI-NEXT: s_waitcnt vmcnt(3) 8345; GCN-NOHSA-VI-NEXT: v_bfe_i32 v5, v10, 0, 16 8346; GCN-NOHSA-VI-NEXT: v_ashrrev_i64 v[18:19], 48, v[8:9] 8347; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v9, 0, 16 8348; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8349; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:16 8350; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v9, 16, v4 8351; GCN-NOHSA-VI-NEXT: v_bfe_i32 v16, v1, 0, 16 8352; GCN-NOHSA-VI-NEXT: v_ashrrev_i64 v[18:19], 48, v[14:15] 8353; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v17, 31, v16 8354; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, v3 8355; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[16:19], off, s[0:3], 0 offset:240 8356; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v1, 0, 16 8357; GCN-NOHSA-VI-NEXT: v_ashrrev_i64 v[17:18], 48, v[2:3] 8358; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8359; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, v7 8360; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:176 8361; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 8362; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v1, 0, 16 8363; GCN-NOHSA-VI-NEXT: v_ashrrev_i64 v[17:18], 48, v[6:7] 8364; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8365; GCN-NOHSA-VI-NEXT: v_mov_b32_e32 v1, v11 8366; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:112 8367; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v7, 16, v10 8368; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v1, 0, 16 8369; GCN-NOHSA-VI-NEXT: v_ashrrev_i64 v[17:18], 48, v[10:11] 8370; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8371; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v14 8372; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[15:18], off, s[0:3], 0 offset:48 8373; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8374; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v1, 0, 16 8375; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8376; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v12 8377; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:224 8378; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v12, 0, 16 8379; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v1, 0, 16 8380; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8381; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8382; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:192 8383; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v1, 16, v8 8384; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v2, 0, 16 8385; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v6 8386; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v3, 0, 16 8387; GCN-NOHSA-VI-NEXT: v_bfe_i32 v15, v2, 0, 16 8388; GCN-NOHSA-VI-NEXT: v_lshrrev_b32_e32 v2, 16, v0 8389; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8390; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8391; GCN-NOHSA-VI-NEXT: v_bfe_i32 v17, v0, 0, 16 8392; GCN-NOHSA-VI-NEXT: v_bfe_i32 v19, v2, 0, 16 8393; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[11:14], off, s[0:3], 0 offset:160 8394; GCN-NOHSA-VI-NEXT: v_bfe_i32 v3, v1, 0, 16 8395; GCN-NOHSA-VI-NEXT: v_bfe_i32 v1, v8, 0, 16 8396; GCN-NOHSA-VI-NEXT: v_bfe_i32 v7, v7, 0, 16 8397; GCN-NOHSA-VI-NEXT: v_bfe_i32 v11, v9, 0, 16 8398; GCN-NOHSA-VI-NEXT: v_bfe_i32 v9, v4, 0, 16 8399; GCN-NOHSA-VI-NEXT: v_bfe_i32 v13, v6, 0, 16 8400; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v18, 31, v17 8401; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v20, 31, v19 8402; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v2, 31, v1 8403; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v6, 31, v5 8404; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v10, 31, v9 8405; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v14, 31, v13 8406; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v4, 31, v3 8407; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v8, 31, v7 8408; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v12, 31, v11 8409; GCN-NOHSA-VI-NEXT: v_ashrrev_i32_e32 v16, 31, v15 8410; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[17:20], off, s[0:3], 0 offset:128 8411; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[13:16], off, s[0:3], 0 offset:96 8412; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[9:12], off, s[0:3], 0 offset:64 8413; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[5:8], off, s[0:3], 0 offset:32 8414; GCN-NOHSA-VI-NEXT: buffer_store_dwordx4 v[1:4], off, s[0:3], 0 8415; GCN-NOHSA-VI-NEXT: s_endpgm 8416; 8417; EG-LABEL: global_sextload_v32i16_to_v32i64: 8418; EG: ; %bb.0: 8419; EG-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 8420; EG-NEXT: TEX 0 @22 8421; EG-NEXT: ALU 56, @31, KC0[CB0:0-32], KC1[] 8422; EG-NEXT: TEX 2 @24 8423; EG-NEXT: ALU 74, @88, KC0[CB0:0-32], KC1[] 8424; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T50.XYZW, T38.X, 0 8425; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T49.XYZW, T36.X, 0 8426; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T48.XYZW, T34.X, 0 8427; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T39.XYZW, T33.X, 0 8428; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T47.XYZW, T32.X, 0 8429; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T46.XYZW, T31.X, 0 8430; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T45.XYZW, T30.X, 0 8431; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T40.XYZW, T29.X, 0 8432; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T44.XYZW, T28.X, 0 8433; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T43.XYZW, T27.X, 0 8434; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T42.XYZW, T26.X, 0 8435; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T20.XYZW, T25.X, 0 8436; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T41.XYZW, T24.X, 0 8437; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T19.XYZW, T23.X, 0 8438; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T37.XYZW, T22.X, 0 8439; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T35.XYZW, T21.X, 1 8440; EG-NEXT: CF_END 8441; EG-NEXT: Fetch clause starting at 22: 8442; EG-NEXT: VTX_READ_128 T20.XYZW, T19.X, 0, #1 8443; EG-NEXT: Fetch clause starting at 24: 8444; EG-NEXT: VTX_READ_128 T38.XYZW, T19.X, 48, #1 8445; EG-NEXT: VTX_READ_128 T39.XYZW, T19.X, 32, #1 8446; EG-NEXT: VTX_READ_128 T40.XYZW, T19.X, 16, #1 8447; EG-NEXT: ALU clause starting at 30: 8448; EG-NEXT: MOV * T19.X, KC0[2].Z, 8449; EG-NEXT: ALU clause starting at 31: 8450; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 8451; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 8452; EG-NEXT: LSHR T21.X, PV.W, literal.x, 8453; EG-NEXT: LSHR * T22.X, KC0[2].Y, literal.x, 8454; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 8455; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 8456; EG-NEXT: 48(6.726233e-44), 0(0.000000e+00) 8457; EG-NEXT: LSHR T23.X, PV.W, literal.x, 8458; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8459; EG-NEXT: 2(2.802597e-45), 32(4.484155e-44) 8460; EG-NEXT: LSHR T24.X, PV.W, literal.x, 8461; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8462; EG-NEXT: 2(2.802597e-45), 80(1.121039e-43) 8463; EG-NEXT: LSHR T25.X, PV.W, literal.x, 8464; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8465; EG-NEXT: 2(2.802597e-45), 64(8.968310e-44) 8466; EG-NEXT: LSHR T26.X, PV.W, literal.x, 8467; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8468; EG-NEXT: 2(2.802597e-45), 112(1.569454e-43) 8469; EG-NEXT: LSHR T27.X, PV.W, literal.x, 8470; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8471; EG-NEXT: 2(2.802597e-45), 96(1.345247e-43) 8472; EG-NEXT: LSHR T28.X, PV.W, literal.x, 8473; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8474; EG-NEXT: 2(2.802597e-45), 144(2.017870e-43) 8475; EG-NEXT: LSHR T29.X, PV.W, literal.x, 8476; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8477; EG-NEXT: 2(2.802597e-45), 128(1.793662e-43) 8478; EG-NEXT: LSHR T30.X, PV.W, literal.x, 8479; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8480; EG-NEXT: 2(2.802597e-45), 176(2.466285e-43) 8481; EG-NEXT: LSHR T31.X, PV.W, literal.x, 8482; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8483; EG-NEXT: 2(2.802597e-45), 160(2.242078e-43) 8484; EG-NEXT: LSHR T32.X, PV.W, literal.x, 8485; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8486; EG-NEXT: 2(2.802597e-45), 208(2.914701e-43) 8487; EG-NEXT: LSHR T33.X, PV.W, literal.x, 8488; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8489; EG-NEXT: 2(2.802597e-45), 192(2.690493e-43) 8490; EG-NEXT: LSHR T34.X, PV.W, literal.x, 8491; EG-NEXT: ADD_INT T0.W, KC0[2].Y, literal.y, 8492; EG-NEXT: ASHR * T35.W, T20.Y, literal.z, 8493; EG-NEXT: 2(2.802597e-45), 240(3.363116e-43) 8494; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 8495; EG-NEXT: LSHR T36.X, PV.W, literal.x, 8496; EG-NEXT: ASHR T35.Z, T20.Y, literal.y, 8497; EG-NEXT: ASHR * T37.W, T20.X, literal.z, 8498; EG-NEXT: 2(2.802597e-45), 16(2.242078e-44) 8499; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 8500; EG-NEXT: BFE_INT T35.X, T20.Y, 0.0, literal.x, 8501; EG-NEXT: ASHR * T37.Z, T20.X, literal.x, 8502; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 8503; EG-NEXT: BFE_INT T37.X, T20.X, 0.0, literal.x, 8504; EG-NEXT: ASHR T35.Y, PV.X, literal.y, 8505; EG-NEXT: ASHR * T19.W, T20.W, literal.y, 8506; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8507; EG-NEXT: ALU clause starting at 88: 8508; EG-NEXT: ASHR T19.Z, T20.W, literal.x, 8509; EG-NEXT: ASHR * T41.W, T20.Z, literal.y, 8510; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8511; EG-NEXT: BFE_INT T19.X, T20.W, 0.0, literal.x, 8512; EG-NEXT: ASHR T37.Y, T37.X, literal.y, 8513; EG-NEXT: ASHR T41.Z, T20.Z, literal.x, 8514; EG-NEXT: ASHR * T20.W, T40.Y, literal.y, 8515; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8516; EG-NEXT: BFE_INT T41.X, T20.Z, 0.0, literal.x, 8517; EG-NEXT: ASHR T19.Y, PV.X, literal.y, 8518; EG-NEXT: ASHR T20.Z, T40.Y, literal.x, 8519; EG-NEXT: ASHR * T42.W, T40.X, literal.y, 8520; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8521; EG-NEXT: BFE_INT T20.X, T40.Y, 0.0, literal.x, 8522; EG-NEXT: ASHR T41.Y, PV.X, literal.y, 8523; EG-NEXT: ASHR T42.Z, T40.X, literal.x, 8524; EG-NEXT: ASHR * T43.W, T40.W, literal.y, 8525; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8526; EG-NEXT: BFE_INT T42.X, T40.X, 0.0, literal.x, 8527; EG-NEXT: ASHR T20.Y, PV.X, literal.y, 8528; EG-NEXT: ASHR T43.Z, T40.W, literal.x, 8529; EG-NEXT: ASHR * T44.W, T40.Z, literal.y, 8530; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8531; EG-NEXT: BFE_INT T43.X, T40.W, 0.0, literal.x, 8532; EG-NEXT: ASHR T42.Y, PV.X, literal.y, 8533; EG-NEXT: ASHR T44.Z, T40.Z, literal.x, 8534; EG-NEXT: ASHR * T40.W, T39.Y, literal.y, 8535; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8536; EG-NEXT: BFE_INT T44.X, T40.Z, 0.0, literal.x, 8537; EG-NEXT: ASHR T43.Y, PV.X, literal.y, 8538; EG-NEXT: ASHR T40.Z, T39.Y, literal.x, 8539; EG-NEXT: ASHR * T45.W, T39.X, literal.y, 8540; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8541; EG-NEXT: BFE_INT T40.X, T39.Y, 0.0, literal.x, 8542; EG-NEXT: ASHR T44.Y, PV.X, literal.y, 8543; EG-NEXT: ASHR T45.Z, T39.X, literal.x, 8544; EG-NEXT: ASHR * T46.W, T39.W, literal.y, 8545; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8546; EG-NEXT: BFE_INT T45.X, T39.X, 0.0, literal.x, 8547; EG-NEXT: ASHR T40.Y, PV.X, literal.y, 8548; EG-NEXT: ASHR T46.Z, T39.W, literal.x, 8549; EG-NEXT: ASHR * T47.W, T39.Z, literal.y, 8550; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8551; EG-NEXT: BFE_INT T46.X, T39.W, 0.0, literal.x, 8552; EG-NEXT: ASHR T45.Y, PV.X, literal.y, 8553; EG-NEXT: ASHR T47.Z, T39.Z, literal.x, 8554; EG-NEXT: ASHR * T39.W, T38.Y, literal.y, 8555; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8556; EG-NEXT: BFE_INT T47.X, T39.Z, 0.0, literal.x, 8557; EG-NEXT: ASHR T46.Y, PV.X, literal.y, 8558; EG-NEXT: ASHR T39.Z, T38.Y, literal.x, 8559; EG-NEXT: ASHR * T48.W, T38.X, literal.y, 8560; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8561; EG-NEXT: BFE_INT T39.X, T38.Y, 0.0, literal.x, 8562; EG-NEXT: ASHR T47.Y, PV.X, literal.y, 8563; EG-NEXT: ASHR T48.Z, T38.X, literal.x, 8564; EG-NEXT: ASHR * T49.W, T38.W, literal.y, 8565; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8566; EG-NEXT: BFE_INT T48.X, T38.X, 0.0, literal.x, 8567; EG-NEXT: ASHR T39.Y, PV.X, literal.y, 8568; EG-NEXT: ASHR T49.Z, T38.W, literal.x, 8569; EG-NEXT: ASHR * T50.W, T38.Z, literal.y, 8570; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8571; EG-NEXT: BFE_INT T49.X, T38.W, 0.0, literal.x, 8572; EG-NEXT: ASHR T48.Y, PV.X, literal.y, 8573; EG-NEXT: ASHR * T50.Z, T38.Z, literal.x, 8574; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8575; EG-NEXT: BFE_INT T50.X, T38.Z, 0.0, literal.x, 8576; EG-NEXT: ASHR T49.Y, PV.X, literal.y, 8577; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.z, 8578; EG-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8579; EG-NEXT: 224(3.138909e-43), 0(0.000000e+00) 8580; EG-NEXT: LSHR T38.X, PV.W, literal.x, 8581; EG-NEXT: ASHR * T50.Y, PV.X, literal.y, 8582; EG-NEXT: 2(2.802597e-45), 31(4.344025e-44) 8583; 8584; CM-LABEL: global_sextload_v32i16_to_v32i64: 8585; CM: ; %bb.0: 8586; CM-NEXT: ALU 0, @30, KC0[CB0:0-32], KC1[] 8587; CM-NEXT: TEX 0 @22 8588; CM-NEXT: ALU 55, @31, KC0[CB0:0-32], KC1[] 8589; CM-NEXT: TEX 2 @24 8590; CM-NEXT: ALU 73, @87, KC0[CB0:0-32], KC1[] 8591; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T38, T50.X 8592; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T49, T36.X 8593; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T48, T34.X 8594; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T47, T33.X 8595; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T39, T32.X 8596; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T46, T31.X 8597; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T45, T30.X 8598; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T44, T29.X 8599; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T40, T28.X 8600; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T43, T27.X 8601; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T42, T26.X 8602; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T41, T25.X 8603; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T20, T24.X 8604; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T19, T23.X 8605; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T37, T22.X 8606; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T35, T21.X 8607; CM-NEXT: CF_END 8608; CM-NEXT: Fetch clause starting at 22: 8609; CM-NEXT: VTX_READ_128 T20.XYZW, T19.X, 48, #1 8610; CM-NEXT: Fetch clause starting at 24: 8611; CM-NEXT: VTX_READ_128 T38.XYZW, T19.X, 0, #1 8612; CM-NEXT: VTX_READ_128 T39.XYZW, T19.X, 16, #1 8613; CM-NEXT: VTX_READ_128 T40.XYZW, T19.X, 32, #1 8614; CM-NEXT: ALU clause starting at 30: 8615; CM-NEXT: MOV * T19.X, KC0[2].Z, 8616; CM-NEXT: ALU clause starting at 31: 8617; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 8618; CM-NEXT: 224(3.138909e-43), 0(0.000000e+00) 8619; CM-NEXT: LSHR T21.X, PV.W, literal.x, 8620; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8621; CM-NEXT: 2(2.802597e-45), 240(3.363116e-43) 8622; CM-NEXT: LSHR T22.X, PV.W, literal.x, 8623; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8624; CM-NEXT: 2(2.802597e-45), 192(2.690493e-43) 8625; CM-NEXT: LSHR T23.X, PV.W, literal.x, 8626; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8627; CM-NEXT: 2(2.802597e-45), 208(2.914701e-43) 8628; CM-NEXT: LSHR T24.X, PV.W, literal.x, 8629; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8630; CM-NEXT: 2(2.802597e-45), 160(2.242078e-43) 8631; CM-NEXT: LSHR T25.X, PV.W, literal.x, 8632; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8633; CM-NEXT: 2(2.802597e-45), 176(2.466285e-43) 8634; CM-NEXT: LSHR T26.X, PV.W, literal.x, 8635; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8636; CM-NEXT: 2(2.802597e-45), 128(1.793662e-43) 8637; CM-NEXT: LSHR T27.X, PV.W, literal.x, 8638; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8639; CM-NEXT: 2(2.802597e-45), 144(2.017870e-43) 8640; CM-NEXT: LSHR T28.X, PV.W, literal.x, 8641; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8642; CM-NEXT: 2(2.802597e-45), 96(1.345247e-43) 8643; CM-NEXT: LSHR T29.X, PV.W, literal.x, 8644; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8645; CM-NEXT: 2(2.802597e-45), 112(1.569454e-43) 8646; CM-NEXT: LSHR T30.X, PV.W, literal.x, 8647; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8648; CM-NEXT: 2(2.802597e-45), 64(8.968310e-44) 8649; CM-NEXT: LSHR T31.X, PV.W, literal.x, 8650; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8651; CM-NEXT: 2(2.802597e-45), 80(1.121039e-43) 8652; CM-NEXT: LSHR T32.X, PV.W, literal.x, 8653; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8654; CM-NEXT: 2(2.802597e-45), 32(4.484155e-44) 8655; CM-NEXT: LSHR T33.X, PV.W, literal.x, 8656; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.y, 8657; CM-NEXT: 2(2.802597e-45), 48(6.726233e-44) 8658; CM-NEXT: LSHR T34.X, PV.W, literal.x, 8659; CM-NEXT: ASHR * T35.W, T20.Z, literal.y, 8660; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 8661; CM-NEXT: LSHR T36.X, KC0[2].Y, literal.x, 8662; CM-NEXT: ASHR T35.Z, T20.Z, literal.y, 8663; CM-NEXT: ASHR * T37.W, T20.W, literal.z, 8664; CM-NEXT: 2(2.802597e-45), 16(2.242078e-44) 8665; CM-NEXT: 31(4.344025e-44), 0(0.000000e+00) 8666; CM-NEXT: BFE_INT T35.X, T20.Z, 0.0, literal.x, 8667; CM-NEXT: ASHR * T37.Z, T20.W, literal.x, 8668; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00) 8669; CM-NEXT: BFE_INT T37.X, T20.W, 0.0, literal.x, 8670; CM-NEXT: ASHR T35.Y, PV.X, literal.y, 8671; CM-NEXT: ASHR * T19.W, T20.X, literal.y, 8672; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8673; CM-NEXT: ALU clause starting at 87: 8674; CM-NEXT: ASHR T19.Z, T20.X, literal.x, 8675; CM-NEXT: ASHR * T20.W, T20.Y, literal.y, 8676; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8677; CM-NEXT: BFE_INT T19.X, T20.X, 0.0, literal.x, 8678; CM-NEXT: ASHR T37.Y, T37.X, literal.y, BS:VEC_120/SCL_212 8679; CM-NEXT: ASHR T20.Z, T20.Y, literal.x, 8680; CM-NEXT: ASHR * T41.W, T40.Z, literal.y, 8681; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8682; CM-NEXT: BFE_INT T20.X, T20.Y, 0.0, literal.x, 8683; CM-NEXT: ASHR T19.Y, PV.X, literal.y, 8684; CM-NEXT: ASHR T41.Z, T40.Z, literal.x, 8685; CM-NEXT: ASHR * T42.W, T40.W, literal.y, 8686; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8687; CM-NEXT: BFE_INT T41.X, T40.Z, 0.0, literal.x, 8688; CM-NEXT: ASHR T20.Y, PV.X, literal.y, 8689; CM-NEXT: ASHR T42.Z, T40.W, literal.x, 8690; CM-NEXT: ASHR * T43.W, T40.X, literal.y, 8691; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8692; CM-NEXT: BFE_INT T42.X, T40.W, 0.0, literal.x, 8693; CM-NEXT: ASHR T41.Y, PV.X, literal.y, 8694; CM-NEXT: ASHR T43.Z, T40.X, literal.x, 8695; CM-NEXT: ASHR * T40.W, T40.Y, literal.y, 8696; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8697; CM-NEXT: BFE_INT T43.X, T40.X, 0.0, literal.x, 8698; CM-NEXT: ASHR T42.Y, PV.X, literal.y, 8699; CM-NEXT: ASHR T40.Z, T40.Y, literal.x, 8700; CM-NEXT: ASHR * T44.W, T39.Z, literal.y, 8701; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8702; CM-NEXT: BFE_INT T40.X, T40.Y, 0.0, literal.x, 8703; CM-NEXT: ASHR T43.Y, PV.X, literal.y, 8704; CM-NEXT: ASHR T44.Z, T39.Z, literal.x, 8705; CM-NEXT: ASHR * T45.W, T39.W, literal.y, 8706; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8707; CM-NEXT: BFE_INT T44.X, T39.Z, 0.0, literal.x, 8708; CM-NEXT: ASHR T40.Y, PV.X, literal.y, 8709; CM-NEXT: ASHR T45.Z, T39.W, literal.x, 8710; CM-NEXT: ASHR * T46.W, T39.X, literal.y, 8711; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8712; CM-NEXT: BFE_INT T45.X, T39.W, 0.0, literal.x, 8713; CM-NEXT: ASHR T44.Y, PV.X, literal.y, 8714; CM-NEXT: ASHR T46.Z, T39.X, literal.x, 8715; CM-NEXT: ASHR * T39.W, T39.Y, literal.y, 8716; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8717; CM-NEXT: BFE_INT T46.X, T39.X, 0.0, literal.x, 8718; CM-NEXT: ASHR T45.Y, PV.X, literal.y, 8719; CM-NEXT: ASHR T39.Z, T39.Y, literal.x, 8720; CM-NEXT: ASHR * T47.W, T38.Z, literal.y, 8721; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8722; CM-NEXT: BFE_INT T39.X, T39.Y, 0.0, literal.x, 8723; CM-NEXT: ASHR T46.Y, PV.X, literal.y, 8724; CM-NEXT: ASHR T47.Z, T38.Z, literal.x, 8725; CM-NEXT: ASHR * T48.W, T38.W, literal.y, 8726; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8727; CM-NEXT: BFE_INT T47.X, T38.Z, 0.0, literal.x, 8728; CM-NEXT: ASHR T39.Y, PV.X, literal.y, 8729; CM-NEXT: ASHR T48.Z, T38.W, literal.x, 8730; CM-NEXT: ASHR * T49.W, T38.X, literal.y, 8731; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8732; CM-NEXT: BFE_INT T48.X, T38.W, 0.0, literal.x, 8733; CM-NEXT: ASHR T47.Y, PV.X, literal.y, 8734; CM-NEXT: ASHR T49.Z, T38.X, literal.x, 8735; CM-NEXT: ASHR * T38.W, T38.Y, literal.y, 8736; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8737; CM-NEXT: BFE_INT T49.X, T38.X, 0.0, literal.x, 8738; CM-NEXT: ASHR T48.Y, PV.X, literal.y, 8739; CM-NEXT: ASHR * T38.Z, T38.Y, literal.x, 8740; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8741; CM-NEXT: BFE_INT T38.X, T38.Y, 0.0, literal.x, 8742; CM-NEXT: ASHR T49.Y, PV.X, literal.y, 8743; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 8744; CM-NEXT: 16(2.242078e-44), 31(4.344025e-44) 8745; CM-NEXT: LSHR T50.X, PV.W, literal.x, 8746; CM-NEXT: ASHR * T38.Y, PV.X, literal.y, 8747; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44) 8748 %load = load <32 x i16>, <32 x i16> addrspace(1)* %in 8749 %ext = sext <32 x i16> %load to <32 x i64> 8750 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 8751 ret void 8752} 8753 8754; define amdgpu_kernel void @global_zextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { 8755; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 8756; %ext = zext <64 x i16> %load to <64 x i64> 8757; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 8758; ret void 8759; } 8760 8761; define amdgpu_kernel void @global_sextload_v64i16_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i16> addrspace(1)* %in) #0 { 8762; %load = load <64 x i16>, <64 x i16> addrspace(1)* %in 8763; %ext = sext <64 x i16> %load to <64 x i64> 8764; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 8765; ret void 8766; } 8767 8768attributes #0 = { nounwind } 8769