1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs | FileCheck %s --check-prefixes=SI 3; RUN: llc < %s -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=VI 4; RUN: llc < %s -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs | FileCheck %s --check-prefixes=EG 5 6declare i32 @llvm.amdgcn.workitem.id.x() #0 7 8declare i32 @llvm.amdgcn.workgroup.id.x() #0 9 10define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { 11; SI-LABEL: shl_v2i32: 12; SI: ; %bb.0: 13; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 14; SI-NEXT: s_mov_b32 s7, 0xf000 15; SI-NEXT: s_mov_b32 s6, -1 16; SI-NEXT: s_mov_b32 s10, s6 17; SI-NEXT: s_mov_b32 s11, s7 18; SI-NEXT: s_waitcnt lgkmcnt(0) 19; SI-NEXT: s_mov_b32 s8, s2 20; SI-NEXT: s_mov_b32 s9, s3 21; SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 22; SI-NEXT: s_mov_b32 s4, s0 23; SI-NEXT: s_mov_b32 s5, s1 24; SI-NEXT: s_waitcnt vmcnt(0) 25; SI-NEXT: v_lshl_b32_e32 v1, v1, v3 26; SI-NEXT: v_lshl_b32_e32 v0, v0, v2 27; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 28; SI-NEXT: s_endpgm 29; 30; VI-LABEL: shl_v2i32: 31; VI: ; %bb.0: 32; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 33; VI-NEXT: s_waitcnt lgkmcnt(0) 34; VI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 35; VI-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x8 36; VI-NEXT: s_mov_b32 s3, 0xf000 37; VI-NEXT: s_mov_b32 s2, -1 38; VI-NEXT: s_waitcnt lgkmcnt(0) 39; VI-NEXT: s_lshl_b32 s5, s5, s7 40; VI-NEXT: s_lshl_b32 s4, s4, s6 41; VI-NEXT: v_mov_b32_e32 v0, s4 42; VI-NEXT: v_mov_b32_e32 v1, s5 43; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 44; VI-NEXT: s_endpgm 45; 46; EG-LABEL: shl_v2i32: 47; EG: ; %bb.0: 48; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 49; EG-NEXT: TEX 1 @6 50; EG-NEXT: ALU 3, @11, KC0[CB0:0-32], KC1[] 51; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 52; EG-NEXT: CF_END 53; EG-NEXT: PAD 54; EG-NEXT: Fetch clause starting at 6: 55; EG-NEXT: VTX_READ_64 T1.XY, T0.X, 8, #1 56; EG-NEXT: VTX_READ_64 T0.XY, T0.X, 0, #1 57; EG-NEXT: ALU clause starting at 10: 58; EG-NEXT: MOV * T0.X, KC0[2].Z, 59; EG-NEXT: ALU clause starting at 11: 60; EG-NEXT: LSHL * T0.Y, T0.Y, T1.Y, 61; EG-NEXT: LSHL T0.X, T0.X, T1.X, 62; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 63; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 64 %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 65 %a = load <2 x i32>, <2 x i32> addrspace(1)* %in 66 %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr 67 %result = shl <2 x i32> %a, %b 68 store <2 x i32> %result, <2 x i32> addrspace(1)* %out 69 ret void 70} 71 72define amdgpu_kernel void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 73; SI-LABEL: shl_v4i32: 74; SI: ; %bb.0: 75; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 76; SI-NEXT: s_mov_b32 s7, 0xf000 77; SI-NEXT: s_mov_b32 s6, -1 78; SI-NEXT: s_mov_b32 s10, s6 79; SI-NEXT: s_mov_b32 s11, s7 80; SI-NEXT: s_waitcnt lgkmcnt(0) 81; SI-NEXT: s_mov_b32 s8, s2 82; SI-NEXT: s_mov_b32 s9, s3 83; SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 84; SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 85; SI-NEXT: s_mov_b32 s4, s0 86; SI-NEXT: s_mov_b32 s5, s1 87; SI-NEXT: s_waitcnt vmcnt(0) 88; SI-NEXT: v_lshl_b32_e32 v3, v3, v7 89; SI-NEXT: v_lshl_b32_e32 v2, v2, v6 90; SI-NEXT: v_lshl_b32_e32 v1, v1, v5 91; SI-NEXT: v_lshl_b32_e32 v0, v0, v4 92; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 93; SI-NEXT: s_endpgm 94; 95; VI-LABEL: shl_v4i32: 96; VI: ; %bb.0: 97; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 98; VI-NEXT: s_waitcnt lgkmcnt(0) 99; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 100; VI-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x10 101; VI-NEXT: s_mov_b32 s3, 0xf000 102; VI-NEXT: s_mov_b32 s2, -1 103; VI-NEXT: s_waitcnt lgkmcnt(0) 104; VI-NEXT: s_lshl_b32 s7, s7, s11 105; VI-NEXT: s_lshl_b32 s6, s6, s10 106; VI-NEXT: s_lshl_b32 s5, s5, s9 107; VI-NEXT: s_lshl_b32 s4, s4, s8 108; VI-NEXT: v_mov_b32_e32 v0, s4 109; VI-NEXT: v_mov_b32_e32 v1, s5 110; VI-NEXT: v_mov_b32_e32 v2, s6 111; VI-NEXT: v_mov_b32_e32 v3, s7 112; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 113; VI-NEXT: s_endpgm 114; 115; EG-LABEL: shl_v4i32: 116; EG: ; %bb.0: 117; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 118; EG-NEXT: TEX 1 @6 119; EG-NEXT: ALU 5, @11, KC0[CB0:0-32], KC1[] 120; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 121; EG-NEXT: CF_END 122; EG-NEXT: PAD 123; EG-NEXT: Fetch clause starting at 6: 124; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 125; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 126; EG-NEXT: ALU clause starting at 10: 127; EG-NEXT: MOV * T0.X, KC0[2].Z, 128; EG-NEXT: ALU clause starting at 11: 129; EG-NEXT: LSHL * T0.W, T0.W, T1.W, 130; EG-NEXT: LSHL * T0.Z, T0.Z, T1.Z, 131; EG-NEXT: LSHL * T0.Y, T0.Y, T1.Y, 132; EG-NEXT: LSHL T0.X, T0.X, T1.X, 133; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 134; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 135 %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 136 %a = load <4 x i32>, <4 x i32> addrspace(1)* %in 137 %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr 138 %result = shl <4 x i32> %a, %b 139 store <4 x i32> %result, <4 x i32> addrspace(1)* %out 140 ret void 141} 142 143define amdgpu_kernel void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 144; SI-LABEL: shl_i16: 145; SI: ; %bb.0: 146; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 147; SI-NEXT: s_mov_b32 s7, 0xf000 148; SI-NEXT: s_mov_b32 s6, -1 149; SI-NEXT: s_mov_b32 s10, s6 150; SI-NEXT: s_mov_b32 s11, s7 151; SI-NEXT: s_waitcnt lgkmcnt(0) 152; SI-NEXT: s_mov_b32 s8, s2 153; SI-NEXT: s_mov_b32 s9, s3 154; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 155; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 offset:2 156; SI-NEXT: s_mov_b32 s4, s0 157; SI-NEXT: s_mov_b32 s5, s1 158; SI-NEXT: s_waitcnt vmcnt(0) 159; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 160; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 161; SI-NEXT: s_endpgm 162; 163; VI-LABEL: shl_i16: 164; VI: ; %bb.0: 165; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 166; VI-NEXT: s_mov_b32 s7, 0xf000 167; VI-NEXT: s_mov_b32 s6, -1 168; VI-NEXT: s_mov_b32 s10, s6 169; VI-NEXT: s_mov_b32 s11, s7 170; VI-NEXT: s_waitcnt lgkmcnt(0) 171; VI-NEXT: s_mov_b32 s8, s2 172; VI-NEXT: s_mov_b32 s9, s3 173; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 174; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 offset:2 175; VI-NEXT: s_mov_b32 s4, s0 176; VI-NEXT: s_mov_b32 s5, s1 177; VI-NEXT: s_waitcnt vmcnt(0) 178; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 179; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 180; VI-NEXT: s_endpgm 181; 182; EG-LABEL: shl_i16: 183; EG: ; %bb.0: 184; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 185; EG-NEXT: TEX 1 @6 186; EG-NEXT: ALU 12, @11, KC0[CB0:0-32], KC1[] 187; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 188; EG-NEXT: CF_END 189; EG-NEXT: PAD 190; EG-NEXT: Fetch clause starting at 6: 191; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1 192; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 193; EG-NEXT: ALU clause starting at 10: 194; EG-NEXT: MOV * T0.X, KC0[2].Z, 195; EG-NEXT: ALU clause starting at 11: 196; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 197; EG-NEXT: LSHL * T1.W, T0.X, T1.X, 198; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 199; EG-NEXT: AND_INT T1.W, PS, literal.x, 200; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 201; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 202; EG-NEXT: LSHL T0.X, PV.W, PS, 203; EG-NEXT: LSHL * T0.W, literal.x, PS, 204; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 205; EG-NEXT: MOV T0.Y, 0.0, 206; EG-NEXT: MOV * T0.Z, 0.0, 207; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 208; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 209 %b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1 210 %a = load i16, i16 addrspace(1)* %in 211 %b = load i16, i16 addrspace(1)* %b_ptr 212 %result = shl i16 %a, %b 213 store i16 %result, i16 addrspace(1)* %out 214 ret void 215} 216 217define amdgpu_kernel void @shl_i16_v_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) { 218; SI-LABEL: shl_i16_v_s: 219; SI: ; %bb.0: 220; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 221; SI-NEXT: s_load_dword s12, s[0:1], 0xd 222; SI-NEXT: s_mov_b32 s3, 0xf000 223; SI-NEXT: s_mov_b32 s2, -1 224; SI-NEXT: s_mov_b32 s10, s2 225; SI-NEXT: s_waitcnt lgkmcnt(0) 226; SI-NEXT: s_mov_b32 s8, s6 227; SI-NEXT: s_mov_b32 s9, s7 228; SI-NEXT: s_mov_b32 s11, s3 229; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 230; SI-NEXT: s_mov_b32 s0, s4 231; SI-NEXT: s_mov_b32 s1, s5 232; SI-NEXT: s_waitcnt vmcnt(0) 233; SI-NEXT: v_lshlrev_b32_e32 v0, s12, v0 234; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 235; SI-NEXT: s_endpgm 236; 237; VI-LABEL: shl_i16_v_s: 238; VI: ; %bb.0: 239; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 240; VI-NEXT: s_load_dword s12, s[0:1], 0x34 241; VI-NEXT: s_mov_b32 s3, 0xf000 242; VI-NEXT: s_mov_b32 s2, -1 243; VI-NEXT: s_mov_b32 s10, s2 244; VI-NEXT: s_waitcnt lgkmcnt(0) 245; VI-NEXT: s_mov_b32 s8, s6 246; VI-NEXT: s_mov_b32 s9, s7 247; VI-NEXT: s_mov_b32 s11, s3 248; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 249; VI-NEXT: s_mov_b32 s0, s4 250; VI-NEXT: s_mov_b32 s1, s5 251; VI-NEXT: s_waitcnt vmcnt(0) 252; VI-NEXT: v_lshlrev_b32_e32 v0, s12, v0 253; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 254; VI-NEXT: s_endpgm 255; 256; EG-LABEL: shl_i16_v_s: 257; EG: ; %bb.0: 258; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[] 259; EG-NEXT: TEX 1 @6 260; EG-NEXT: ALU 12, @12, KC0[CB0:0-32], KC1[] 261; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 262; EG-NEXT: CF_END 263; EG-NEXT: PAD 264; EG-NEXT: Fetch clause starting at 6: 265; EG-NEXT: VTX_READ_16 T1.X, T1.X, 0, #1 266; EG-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3 267; EG-NEXT: ALU clause starting at 10: 268; EG-NEXT: MOV T0.X, 0.0, 269; EG-NEXT: MOV * T1.X, KC0[2].Z, 270; EG-NEXT: ALU clause starting at 12: 271; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 272; EG-NEXT: LSHL * T1.W, T1.X, T0.X, 273; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 274; EG-NEXT: AND_INT T1.W, PS, literal.x, 275; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 276; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 277; EG-NEXT: LSHL T0.X, PV.W, PS, 278; EG-NEXT: LSHL * T0.W, literal.x, PS, 279; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 280; EG-NEXT: MOV T0.Y, 0.0, 281; EG-NEXT: MOV * T0.Z, 0.0, 282; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 283; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 284 %a = load i16, i16 addrspace(1)* %in 285 %result = shl i16 %a, %b 286 store i16 %result, i16 addrspace(1)* %out 287 ret void 288} 289 290define amdgpu_kernel void @shl_i16_v_compute_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) { 291; SI-LABEL: shl_i16_v_compute_s: 292; SI: ; %bb.0: 293; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 294; SI-NEXT: s_load_dword s12, s[0:1], 0xd 295; SI-NEXT: s_mov_b32 s3, 0xf000 296; SI-NEXT: s_mov_b32 s2, -1 297; SI-NEXT: s_mov_b32 s10, s2 298; SI-NEXT: s_waitcnt lgkmcnt(0) 299; SI-NEXT: s_mov_b32 s8, s6 300; SI-NEXT: s_mov_b32 s9, s7 301; SI-NEXT: s_mov_b32 s11, s3 302; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 303; SI-NEXT: s_add_i32 s12, s12, 3 304; SI-NEXT: s_mov_b32 s0, s4 305; SI-NEXT: s_mov_b32 s1, s5 306; SI-NEXT: s_waitcnt vmcnt(0) 307; SI-NEXT: v_lshlrev_b32_e32 v0, s12, v0 308; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 309; SI-NEXT: s_endpgm 310; 311; VI-LABEL: shl_i16_v_compute_s: 312; VI: ; %bb.0: 313; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 314; VI-NEXT: s_load_dword s12, s[0:1], 0x34 315; VI-NEXT: s_mov_b32 s3, 0xf000 316; VI-NEXT: s_mov_b32 s2, -1 317; VI-NEXT: s_mov_b32 s10, s2 318; VI-NEXT: s_waitcnt lgkmcnt(0) 319; VI-NEXT: s_mov_b32 s8, s6 320; VI-NEXT: s_mov_b32 s9, s7 321; VI-NEXT: s_mov_b32 s11, s3 322; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 323; VI-NEXT: s_add_i32 s12, s12, 3 324; VI-NEXT: s_mov_b32 s0, s4 325; VI-NEXT: s_mov_b32 s1, s5 326; VI-NEXT: s_waitcnt vmcnt(0) 327; VI-NEXT: v_lshlrev_b32_e32 v0, s12, v0 328; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 329; VI-NEXT: s_endpgm 330; 331; EG-LABEL: shl_i16_v_compute_s: 332; EG: ; %bb.0: 333; EG-NEXT: ALU 0, @12, KC0[], KC1[] 334; EG-NEXT: TEX 0 @8 335; EG-NEXT: ALU 0, @13, KC0[CB0:0-32], KC1[] 336; EG-NEXT: TEX 0 @10 337; EG-NEXT: ALU 15, @14, KC0[CB0:0-32], KC1[] 338; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 339; EG-NEXT: CF_END 340; EG-NEXT: PAD 341; EG-NEXT: Fetch clause starting at 8: 342; EG-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3 343; EG-NEXT: Fetch clause starting at 10: 344; EG-NEXT: VTX_READ_16 T1.X, T1.X, 0, #1 345; EG-NEXT: ALU clause starting at 12: 346; EG-NEXT: MOV * T0.X, 0.0, 347; EG-NEXT: ALU clause starting at 13: 348; EG-NEXT: MOV * T1.X, KC0[2].Z, 349; EG-NEXT: ALU clause starting at 14: 350; EG-NEXT: ADD_INT * T0.W, T0.X, literal.x, 351; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 352; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 353; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.y, 354; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 355; EG-NEXT: LSHL * T0.W, T1.X, PV.W, 356; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 357; EG-NEXT: LSHL * T1.W, T1.W, literal.y, 358; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 359; EG-NEXT: LSHL T0.X, PV.W, PS, 360; EG-NEXT: LSHL * T0.W, literal.x, PS, 361; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 362; EG-NEXT: MOV T0.Y, 0.0, 363; EG-NEXT: MOV * T0.Z, 0.0, 364; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 365; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 366 %a = load i16, i16 addrspace(1)* %in 367 %b.add = add i16 %b, 3 368 %result = shl i16 %a, %b.add 369 store i16 %result, i16 addrspace(1)* %out 370 ret void 371} 372 373define amdgpu_kernel void @shl_i16_computed_amount(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 374; SI-LABEL: shl_i16_computed_amount: 375; SI: ; %bb.0: 376; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 377; SI-NEXT: s_mov_b32 s7, 0xf000 378; SI-NEXT: s_mov_b32 s6, -1 379; SI-NEXT: s_mov_b32 s10, s6 380; SI-NEXT: s_mov_b32 s11, s7 381; SI-NEXT: s_waitcnt lgkmcnt(0) 382; SI-NEXT: s_mov_b32 s8, s2 383; SI-NEXT: s_mov_b32 s9, s3 384; SI-NEXT: v_lshlrev_b32_e32 v0, 1, v0 385; SI-NEXT: v_mov_b32_e32 v1, 0 386; SI-NEXT: s_mov_b32 s14, 0 387; SI-NEXT: s_mov_b32 s15, s7 388; SI-NEXT: s_mov_b64 s[12:13], s[2:3] 389; SI-NEXT: buffer_load_ushort v2, off, s[8:11], 0 glc 390; SI-NEXT: s_waitcnt vmcnt(0) 391; SI-NEXT: buffer_load_ushort v0, v[0:1], s[12:15], 0 addr64 offset:2 glc 392; SI-NEXT: s_waitcnt vmcnt(0) 393; SI-NEXT: s_mov_b32 s4, s0 394; SI-NEXT: s_mov_b32 s5, s1 395; SI-NEXT: v_add_i32_e32 v0, vcc, 3, v0 396; SI-NEXT: v_lshlrev_b32_e32 v0, v0, v2 397; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 398; SI-NEXT: s_endpgm 399; 400; VI-LABEL: shl_i16_computed_amount: 401; VI: ; %bb.0: 402; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 403; VI-NEXT: v_lshlrev_b32_e32 v0, 1, v0 404; VI-NEXT: s_mov_b32 s7, 0xf000 405; VI-NEXT: s_mov_b32 s6, -1 406; VI-NEXT: s_mov_b32 s10, s6 407; VI-NEXT: s_waitcnt lgkmcnt(0) 408; VI-NEXT: v_mov_b32_e32 v1, s3 409; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 410; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 411; VI-NEXT: v_add_u32_e32 v0, vcc, 2, v0 412; VI-NEXT: s_mov_b32 s8, s2 413; VI-NEXT: s_mov_b32 s9, s3 414; VI-NEXT: s_mov_b32 s11, s7 415; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 416; VI-NEXT: buffer_load_ushort v2, off, s[8:11], 0 glc 417; VI-NEXT: s_waitcnt vmcnt(0) 418; VI-NEXT: flat_load_ushort v0, v[0:1] glc 419; VI-NEXT: s_waitcnt vmcnt(0) 420; VI-NEXT: s_mov_b32 s4, s0 421; VI-NEXT: s_mov_b32 s5, s1 422; VI-NEXT: v_add_u16_e32 v0, 3, v0 423; VI-NEXT: v_lshlrev_b16_e32 v0, v0, v2 424; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 425; VI-NEXT: s_endpgm 426; 427; EG-LABEL: shl_i16_computed_amount: 428; EG: ; %bb.0: 429; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 430; EG-NEXT: TEX 0 @8 431; EG-NEXT: ALU 1, @13, KC0[CB0:0-32], KC1[] 432; EG-NEXT: TEX 0 @10 433; EG-NEXT: ALU 15, @15, KC0[CB0:0-32], KC1[] 434; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 435; EG-NEXT: CF_END 436; EG-NEXT: PAD 437; EG-NEXT: Fetch clause starting at 8: 438; EG-NEXT: VTX_READ_16 T1.X, T1.X, 0, #1 439; EG-NEXT: Fetch clause starting at 10: 440; EG-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1 441; EG-NEXT: ALU clause starting at 12: 442; EG-NEXT: MOV * T1.X, KC0[2].Z, 443; EG-NEXT: ALU clause starting at 13: 444; EG-NEXT: LSHL * T0.W, T0.X, 1, 445; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, 446; EG-NEXT: ALU clause starting at 15: 447; EG-NEXT: ADD_INT * T0.W, T0.X, literal.x, 448; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 449; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 450; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.y, 451; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 452; EG-NEXT: LSHL * T0.W, T1.X, PV.W, 453; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 454; EG-NEXT: LSHL * T1.W, T1.W, literal.y, 455; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 456; EG-NEXT: LSHL T0.X, PV.W, PS, 457; EG-NEXT: LSHL * T0.W, literal.x, PS, 458; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 459; EG-NEXT: MOV T0.Y, 0.0, 460; EG-NEXT: MOV * T0.Z, 0.0, 461; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 462; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 463 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 464 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i32 %tid 465 %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid 466 %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i16 1 467 %a = load volatile i16, i16 addrspace(1)* %in 468 %b = load volatile i16, i16 addrspace(1)* %b_ptr 469 %b.add = add i16 %b, 3 470 %result = shl i16 %a, %b.add 471 store i16 %result, i16 addrspace(1)* %out 472 ret void 473} 474 475define amdgpu_kernel void @shl_i16_i_s(i16 addrspace(1)* %out, i16 zeroext %a) { 476; SI-LABEL: shl_i16_i_s: 477; SI: ; %bb.0: 478; SI-NEXT: s_load_dword s4, s[0:1], 0xb 479; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 480; SI-NEXT: s_mov_b32 s3, 0xf000 481; SI-NEXT: s_mov_b32 s2, -1 482; SI-NEXT: s_waitcnt lgkmcnt(0) 483; SI-NEXT: s_lshl_b32 s4, s4, 12 484; SI-NEXT: v_mov_b32_e32 v0, s4 485; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 486; SI-NEXT: s_endpgm 487; 488; VI-LABEL: shl_i16_i_s: 489; VI: ; %bb.0: 490; VI-NEXT: s_load_dword s4, s[0:1], 0x2c 491; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 492; VI-NEXT: s_mov_b32 s3, 0xf000 493; VI-NEXT: s_mov_b32 s2, -1 494; VI-NEXT: s_waitcnt lgkmcnt(0) 495; VI-NEXT: s_lshl_b32 s4, s4, 12 496; VI-NEXT: v_mov_b32_e32 v0, s4 497; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 498; VI-NEXT: s_endpgm 499; 500; EG-LABEL: shl_i16_i_s: 501; EG: ; %bb.0: 502; EG-NEXT: ALU 0, @8, KC0[], KC1[] 503; EG-NEXT: TEX 0 @6 504; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] 505; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 506; EG-NEXT: CF_END 507; EG-NEXT: PAD 508; EG-NEXT: Fetch clause starting at 6: 509; EG-NEXT: VTX_READ_16 T0.X, T0.X, 40, #3 510; EG-NEXT: ALU clause starting at 8: 511; EG-NEXT: MOV * T0.X, 0.0, 512; EG-NEXT: ALU clause starting at 9: 513; EG-NEXT: BFE_INT T0.W, T0.X, 0.0, literal.x, 514; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.y, 515; EG-NEXT: 16(2.242078e-44), 3(4.203895e-45) 516; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 517; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) 518; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 519; EG-NEXT: LSHL * T1.W, T1.W, literal.y, 520; EG-NEXT: 61440(8.609578e-41), 3(4.203895e-45) 521; EG-NEXT: LSHL T0.X, PV.W, PS, 522; EG-NEXT: LSHL * T0.W, literal.x, PS, 523; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 524; EG-NEXT: MOV T0.Y, 0.0, 525; EG-NEXT: MOV * T0.Z, 0.0, 526; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 527; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 528 %result = shl i16 %a, 12 529 store i16 %result, i16 addrspace(1)* %out 530 ret void 531} 532 533define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 534; SI-LABEL: shl_v2i16: 535; SI: ; %bb.0: 536; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 537; SI-NEXT: s_mov_b32 s7, 0xf000 538; SI-NEXT: s_mov_b32 s6, -1 539; SI-NEXT: s_mov_b32 s10, s6 540; SI-NEXT: s_mov_b32 s11, s7 541; SI-NEXT: s_waitcnt lgkmcnt(0) 542; SI-NEXT: s_mov_b32 s8, s2 543; SI-NEXT: s_mov_b32 s9, s3 544; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 545; SI-NEXT: v_mov_b32_e32 v1, 0 546; SI-NEXT: s_mov_b32 s14, 0 547; SI-NEXT: s_mov_b32 s15, s7 548; SI-NEXT: s_mov_b64 s[12:13], s[2:3] 549; SI-NEXT: buffer_load_dword v2, off, s[8:11], 0 550; SI-NEXT: buffer_load_dword v0, v[0:1], s[12:15], 0 addr64 offset:4 551; SI-NEXT: s_mov_b32 s4, s0 552; SI-NEXT: s_mov_b32 s5, s1 553; SI-NEXT: s_waitcnt vmcnt(1) 554; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v2 555; SI-NEXT: s_waitcnt vmcnt(0) 556; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v0 557; SI-NEXT: v_lshlrev_b32_e32 v0, v0, v2 558; SI-NEXT: v_lshlrev_b32_e32 v1, v3, v1 559; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 560; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 561; SI-NEXT: v_or_b32_e32 v0, v0, v1 562; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 563; SI-NEXT: s_endpgm 564; 565; VI-LABEL: shl_v2i16: 566; VI: ; %bb.0: 567; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 568; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 569; VI-NEXT: s_waitcnt lgkmcnt(0) 570; VI-NEXT: v_mov_b32_e32 v1, s3 571; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 572; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 573; VI-NEXT: v_add_u32_e32 v0, vcc, 4, v0 574; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 575; VI-NEXT: flat_load_dword v0, v[0:1] 576; VI-NEXT: s_load_dword s4, s[2:3], 0x0 577; VI-NEXT: s_mov_b32 s3, 0xf000 578; VI-NEXT: s_mov_b32 s2, -1 579; VI-NEXT: s_waitcnt lgkmcnt(0) 580; VI-NEXT: s_lshr_b32 s5, s4, 16 581; VI-NEXT: v_mov_b32_e32 v1, s5 582; VI-NEXT: s_waitcnt vmcnt(0) 583; VI-NEXT: v_lshlrev_b16_e64 v2, v0, s4 584; VI-NEXT: v_lshlrev_b16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 585; VI-NEXT: v_or_b32_e32 v0, v2, v0 586; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 587; VI-NEXT: s_endpgm 588; 589; EG-LABEL: shl_v2i16: 590; EG: ; %bb.0: 591; EG-NEXT: ALU 2, @12, KC0[CB0:0-32], KC1[] 592; EG-NEXT: TEX 0 @8 593; EG-NEXT: ALU 0, @15, KC0[CB0:0-32], KC1[] 594; EG-NEXT: TEX 0 @10 595; EG-NEXT: ALU 12, @16, KC0[CB0:0-32], KC1[] 596; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T7.X, 1 597; EG-NEXT: CF_END 598; EG-NEXT: PAD 599; EG-NEXT: Fetch clause starting at 8: 600; EG-NEXT: VTX_READ_32 T0.X, T0.X, 4, #1 601; EG-NEXT: Fetch clause starting at 10: 602; EG-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1 603; EG-NEXT: ALU clause starting at 12: 604; EG-NEXT: LSHL * T0.W, T0.X, literal.x, 605; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 606; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, 607; EG-NEXT: ALU clause starting at 15: 608; EG-NEXT: MOV * T7.X, KC0[2].Z, 609; EG-NEXT: ALU clause starting at 16: 610; EG-NEXT: AND_INT T0.Y, T0.X, literal.x, 611; EG-NEXT: AND_INT T0.Z, T7.X, literal.x, BS:VEC_120/SCL_212 612; EG-NEXT: LSHR T0.W, T0.X, literal.y, 613; EG-NEXT: LSHR * T1.W, T7.X, literal.y, 614; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 615; EG-NEXT: LSHL T0.W, PS, PV.W, 616; EG-NEXT: LSHL * T1.W, PV.Z, PV.Y, 617; EG-NEXT: AND_INT T1.W, PS, literal.x, 618; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 619; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 620; EG-NEXT: OR_INT T0.X, PV.W, PS, 621; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, 622; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 623 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 624 %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid 625 %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid 626 %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1 627 %a = load <2 x i16>, <2 x i16> addrspace(1)* %in 628 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr 629 %result = shl <2 x i16> %a, %b 630 store <2 x i16> %result, <2 x i16> addrspace(1)* %out 631 ret void 632} 633 634define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 635; SI-LABEL: shl_v4i16: 636; SI: ; %bb.0: 637; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 638; SI-NEXT: s_mov_b32 s7, 0xf000 639; SI-NEXT: s_mov_b32 s6, 0 640; SI-NEXT: v_lshlrev_b32_e32 v0, 3, v0 641; SI-NEXT: v_mov_b32_e32 v1, 0 642; SI-NEXT: s_waitcnt lgkmcnt(0) 643; SI-NEXT: s_mov_b64 s[4:5], s[2:3] 644; SI-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64 645; SI-NEXT: buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 offset:8 646; SI-NEXT: s_mov_b32 s4, 0xffff 647; SI-NEXT: s_mov_b64 s[2:3], s[6:7] 648; SI-NEXT: s_waitcnt vmcnt(1) 649; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v2 650; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v3 651; SI-NEXT: s_waitcnt vmcnt(0) 652; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v4 653; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v5 654; SI-NEXT: v_lshlrev_b32_e32 v3, v5, v3 655; SI-NEXT: v_lshlrev_b32_e32 v2, v4, v2 656; SI-NEXT: v_lshlrev_b32_e32 v4, v9, v7 657; SI-NEXT: v_lshlrev_b32_e32 v5, v8, v6 658; SI-NEXT: v_and_b32_e32 v3, s4, v3 659; SI-NEXT: v_and_b32_e32 v2, s4, v2 660; SI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 661; SI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 662; SI-NEXT: v_or_b32_e32 v3, v3, v4 663; SI-NEXT: v_or_b32_e32 v2, v2, v5 664; SI-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 665; SI-NEXT: s_endpgm 666; 667; VI-LABEL: shl_v4i16: 668; VI: ; %bb.0: 669; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 670; VI-NEXT: v_lshlrev_b32_e32 v4, 3, v0 671; VI-NEXT: s_waitcnt lgkmcnt(0) 672; VI-NEXT: v_mov_b32_e32 v1, s3 673; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4 674; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 675; VI-NEXT: v_add_u32_e32 v2, vcc, 8, v0 676; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc 677; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] 678; VI-NEXT: flat_load_dwordx2 v[2:3], v[2:3] 679; VI-NEXT: v_mov_b32_e32 v5, s1 680; VI-NEXT: v_add_u32_e32 v4, vcc, s0, v4 681; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 682; VI-NEXT: s_waitcnt vmcnt(0) 683; VI-NEXT: v_lshlrev_b16_e32 v6, v3, v1 684; VI-NEXT: v_lshlrev_b16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 685; VI-NEXT: v_lshlrev_b16_e32 v3, v2, v0 686; VI-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 687; VI-NEXT: v_or_b32_e32 v1, v6, v1 688; VI-NEXT: v_or_b32_e32 v0, v3, v0 689; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 690; VI-NEXT: s_endpgm 691; 692; EG-LABEL: shl_v4i16: 693; EG: ; %bb.0: 694; EG-NEXT: ALU 2, @12, KC0[CB0:0-32], KC1[] 695; EG-NEXT: TEX 0 @8 696; EG-NEXT: ALU 3, @15, KC0[], KC1[] 697; EG-NEXT: TEX 0 @10 698; EG-NEXT: ALU 49, @19, KC0[CB0:0-32], KC1[] 699; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XY, T0.X, 1 700; EG-NEXT: CF_END 701; EG-NEXT: PAD 702; EG-NEXT: Fetch clause starting at 8: 703; EG-NEXT: VTX_READ_64 T10.XY, T0.X, 0, #1 704; EG-NEXT: Fetch clause starting at 10: 705; EG-NEXT: VTX_READ_64 T10.XY, T0.X, 8, #1 706; EG-NEXT: ALU clause starting at 12: 707; EG-NEXT: LSHL * T0.W, T0.X, literal.x, 708; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 709; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, 710; EG-NEXT: ALU clause starting at 15: 711; EG-NEXT: MOV T4.X, T10.X, 712; EG-NEXT: MOV * T5.X, T10.Y, 713; EG-NEXT: MOV T0.Y, PV.X, 714; EG-NEXT: MOV * T0.Z, PS, 715; EG-NEXT: ALU clause starting at 19: 716; EG-NEXT: MOV T2.X, T10.X, 717; EG-NEXT: MOV * T3.X, T10.Y, 718; EG-NEXT: MOV T0.X, T6.X, 719; EG-NEXT: MOV * T1.Y, PV.X, 720; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 721; EG-NEXT: AND_INT * T2.W, T0.Y, literal.x, 722; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 723; EG-NEXT: LSHL * T1.W, PS, PV.W, 724; EG-NEXT: AND_INT T1.W, PV.W, literal.x, 725; EG-NEXT: AND_INT * T2.W, T0.X, literal.y, 726; EG-NEXT: 65535(9.183409e-41), -65536(nan) 727; EG-NEXT: OR_INT * T1.W, PS, PV.W, 728; EG-NEXT: MOV T0.X, T3.X, 729; EG-NEXT: MOV * T6.X, PV.W, 730; EG-NEXT: MOV T1.Z, PS, 731; EG-NEXT: LSHR T1.W, T1.Y, literal.x, 732; EG-NEXT: LSHR * T2.W, T0.Y, literal.x, 733; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 734; EG-NEXT: LSHL T1.W, PS, PV.W, 735; EG-NEXT: AND_INT * T2.W, PV.Z, literal.x, 736; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 737; EG-NEXT: LSHL * T1.W, PV.W, literal.x, 738; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 739; EG-NEXT: OR_INT * T1.W, T2.W, PV.W, 740; EG-NEXT: MOV T6.X, PV.W, 741; EG-NEXT: MOV T0.Y, T7.X, 742; EG-NEXT: AND_INT T1.W, T0.X, literal.x, BS:VEC_120/SCL_212 743; EG-NEXT: AND_INT * T2.W, T0.Z, literal.x, 744; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 745; EG-NEXT: LSHL T1.W, PS, PV.W, 746; EG-NEXT: AND_INT * T2.W, PV.Y, literal.x, 747; EG-NEXT: -65536(nan), 0(0.000000e+00) 748; EG-NEXT: AND_INT * T1.W, PV.W, literal.x, 749; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 750; EG-NEXT: OR_INT * T1.W, T2.W, PV.W, 751; EG-NEXT: MOV * T7.X, PV.W, 752; EG-NEXT: MOV T0.Y, PV.X, 753; EG-NEXT: LSHR T1.W, T0.X, literal.x, 754; EG-NEXT: LSHR * T2.W, T0.Z, literal.x, 755; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 756; EG-NEXT: LSHL * T1.W, PS, PV.W, 757; EG-NEXT: AND_INT T0.Z, T0.Y, literal.x, 758; EG-NEXT: LSHL T1.W, PV.W, literal.y, 759; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W, 760; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 761; EG-NEXT: LSHR T0.X, PS, literal.x, 762; EG-NEXT: OR_INT * T10.Y, PV.Z, PV.W, 763; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 764; EG-NEXT: MOV T7.X, PV.Y, 765; EG-NEXT: MOV * T10.X, T6.X, 766 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 767 %gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid 768 %gep.out = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i32 %tid 769 %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1 770 %a = load <4 x i16>, <4 x i16> addrspace(1)* %gep 771 %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr 772 %result = shl <4 x i16> %a, %b 773 store <4 x i16> %result, <4 x i16> addrspace(1)* %gep.out 774 ret void 775} 776 777define amdgpu_kernel void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { 778; SI-LABEL: shl_i64: 779; SI: ; %bb.0: 780; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 781; SI-NEXT: s_mov_b32 s7, 0xf000 782; SI-NEXT: s_mov_b32 s6, -1 783; SI-NEXT: s_mov_b32 s10, s6 784; SI-NEXT: s_mov_b32 s11, s7 785; SI-NEXT: s_waitcnt lgkmcnt(0) 786; SI-NEXT: s_mov_b32 s8, s2 787; SI-NEXT: s_mov_b32 s9, s3 788; SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 789; SI-NEXT: s_mov_b32 s4, s0 790; SI-NEXT: s_mov_b32 s5, s1 791; SI-NEXT: s_waitcnt vmcnt(0) 792; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2 793; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 794; SI-NEXT: s_endpgm 795; 796; VI-LABEL: shl_i64: 797; VI: ; %bb.0: 798; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 799; VI-NEXT: s_waitcnt lgkmcnt(0) 800; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 801; VI-NEXT: s_mov_b32 s3, 0xf000 802; VI-NEXT: s_mov_b32 s2, -1 803; VI-NEXT: s_waitcnt lgkmcnt(0) 804; VI-NEXT: s_lshl_b64 s[4:5], s[4:5], s6 805; VI-NEXT: v_mov_b32_e32 v0, s4 806; VI-NEXT: v_mov_b32_e32 v1, s5 807; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 808; VI-NEXT: s_endpgm 809; 810; EG-LABEL: shl_i64: 811; EG: ; %bb.0: 812; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 813; EG-NEXT: TEX 0 @6 814; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] 815; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 816; EG-NEXT: CF_END 817; EG-NEXT: PAD 818; EG-NEXT: Fetch clause starting at 6: 819; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 820; EG-NEXT: ALU clause starting at 8: 821; EG-NEXT: MOV * T0.X, KC0[2].Z, 822; EG-NEXT: ALU clause starting at 9: 823; EG-NEXT: AND_INT T1.Y, T0.Z, literal.x, 824; EG-NEXT: LSHR T1.Z, T0.Y, 1, 825; EG-NEXT: BIT_ALIGN_INT T0.W, T0.Y, T0.X, 1, 826; EG-NEXT: NOT_INT * T1.W, T0.Z, 827; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 828; EG-NEXT: BIT_ALIGN_INT T1.Z, PV.Z, PV.W, PS, 829; EG-NEXT: LSHL T0.W, T0.X, PV.Y, 830; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x, 831; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 832; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W, 833; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, 834; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 835; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 836 %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 837 %a = load i64, i64 addrspace(1)* %in 838 %b = load i64, i64 addrspace(1)* %b_ptr 839 %result = shl i64 %a, %b 840 store i64 %result, i64 addrspace(1)* %out 841 ret void 842} 843 844define amdgpu_kernel void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { 845; SI-LABEL: shl_v2i64: 846; SI: ; %bb.0: 847; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 848; SI-NEXT: s_mov_b32 s7, 0xf000 849; SI-NEXT: s_mov_b32 s6, -1 850; SI-NEXT: s_mov_b32 s10, s6 851; SI-NEXT: s_mov_b32 s11, s7 852; SI-NEXT: s_waitcnt lgkmcnt(0) 853; SI-NEXT: s_mov_b32 s8, s2 854; SI-NEXT: s_mov_b32 s9, s3 855; SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 856; SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 857; SI-NEXT: s_mov_b32 s4, s0 858; SI-NEXT: s_mov_b32 s5, s1 859; SI-NEXT: s_waitcnt vmcnt(0) 860; SI-NEXT: v_lshl_b64 v[2:3], v[2:3], v6 861; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v4 862; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 863; SI-NEXT: s_endpgm 864; 865; VI-LABEL: shl_v2i64: 866; VI: ; %bb.0: 867; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 868; VI-NEXT: s_waitcnt lgkmcnt(0) 869; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 870; VI-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x10 871; VI-NEXT: s_mov_b32 s3, 0xf000 872; VI-NEXT: s_mov_b32 s2, -1 873; VI-NEXT: s_waitcnt lgkmcnt(0) 874; VI-NEXT: s_lshl_b64 s[6:7], s[6:7], s10 875; VI-NEXT: s_lshl_b64 s[4:5], s[4:5], s8 876; VI-NEXT: v_mov_b32_e32 v0, s4 877; VI-NEXT: v_mov_b32_e32 v1, s5 878; VI-NEXT: v_mov_b32_e32 v2, s6 879; VI-NEXT: v_mov_b32_e32 v3, s7 880; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 881; VI-NEXT: s_endpgm 882; 883; EG-LABEL: shl_v2i64: 884; EG: ; %bb.0: 885; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 886; EG-NEXT: TEX 1 @6 887; EG-NEXT: ALU 22, @11, KC0[CB0:0-32], KC1[] 888; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1 889; EG-NEXT: CF_END 890; EG-NEXT: PAD 891; EG-NEXT: Fetch clause starting at 6: 892; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 893; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 894; EG-NEXT: ALU clause starting at 10: 895; EG-NEXT: MOV * T0.X, KC0[2].Z, 896; EG-NEXT: ALU clause starting at 11: 897; EG-NEXT: AND_INT T1.Y, T1.Z, literal.x, 898; EG-NEXT: LSHR T2.Z, T0.W, 1, 899; EG-NEXT: BIT_ALIGN_INT T0.W, T0.W, T0.Z, 1, 900; EG-NEXT: NOT_INT * T1.W, T1.Z, 901; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 902; EG-NEXT: BIT_ALIGN_INT T0.W, PV.Z, PV.W, PS, 903; EG-NEXT: LSHL * T1.W, T0.Z, PV.Y, 904; EG-NEXT: AND_INT T2.X, T1.Z, literal.x, 905; EG-NEXT: AND_INT T1.Y, T1.X, literal.y, 906; EG-NEXT: LSHR T0.Z, T0.Y, 1, 907; EG-NEXT: BIT_ALIGN_INT T2.W, T0.Y, T0.X, 1, 908; EG-NEXT: NOT_INT * T3.W, T1.X, 909; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44) 910; EG-NEXT: BIT_ALIGN_INT T0.Y, PV.Z, PV.W, PS, 911; EG-NEXT: LSHL T0.Z, T0.X, PV.Y, 912; EG-NEXT: AND_INT T2.W, T1.X, literal.x, BS:VEC_120/SCL_212 913; EG-NEXT: CNDE_INT * T3.W, PV.X, T0.W, T1.W, 914; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 915; EG-NEXT: CNDE_INT T3.Y, PV.W, PV.Y, PV.Z, 916; EG-NEXT: CNDE_INT * T3.Z, T2.X, T1.W, 0.0, 917; EG-NEXT: CNDE_INT T3.X, T2.W, T0.Z, 0.0, 918; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 919; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 920 %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1 921 %a = load <2 x i64>, <2 x i64> addrspace(1)* %in 922 %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr 923 %result = shl <2 x i64> %a, %b 924 store <2 x i64> %result, <2 x i64> addrspace(1)* %out 925 ret void 926} 927 928define amdgpu_kernel void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { 929; SI-LABEL: shl_v4i64: 930; SI: ; %bb.0: 931; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 932; SI-NEXT: s_mov_b32 s3, 0xf000 933; SI-NEXT: s_mov_b32 s2, -1 934; SI-NEXT: s_mov_b32 s10, s2 935; SI-NEXT: s_mov_b32 s11, s3 936; SI-NEXT: s_waitcnt lgkmcnt(0) 937; SI-NEXT: s_mov_b32 s8, s6 938; SI-NEXT: s_mov_b32 s9, s7 939; SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16 940; SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:48 941; SI-NEXT: buffer_load_dwordx4 v[7:10], off, s[8:11], 0 942; SI-NEXT: buffer_load_dwordx4 v[11:14], off, s[8:11], 0 offset:32 943; SI-NEXT: s_mov_b32 s0, s4 944; SI-NEXT: s_mov_b32 s1, s5 945; SI-NEXT: s_waitcnt vmcnt(2) 946; SI-NEXT: v_lshl_b64 v[2:3], v[2:3], v6 947; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v4 948; SI-NEXT: s_waitcnt vmcnt(0) 949; SI-NEXT: v_lshl_b64 v[9:10], v[9:10], v13 950; SI-NEXT: v_lshl_b64 v[7:8], v[7:8], v11 951; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 952; SI-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 953; SI-NEXT: s_endpgm 954; 955; VI-LABEL: shl_v4i64: 956; VI: ; %bb.0: 957; VI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x24 958; VI-NEXT: s_waitcnt lgkmcnt(0) 959; VI-NEXT: s_load_dwordx8 s[0:7], s[18:19], 0x0 960; VI-NEXT: s_load_dwordx8 s[8:15], s[18:19], 0x20 961; VI-NEXT: s_mov_b32 s19, 0xf000 962; VI-NEXT: s_mov_b32 s18, -1 963; VI-NEXT: s_waitcnt lgkmcnt(0) 964; VI-NEXT: s_lshl_b64 s[6:7], s[6:7], s14 965; VI-NEXT: s_lshl_b64 s[4:5], s[4:5], s12 966; VI-NEXT: s_lshl_b64 s[2:3], s[2:3], s10 967; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 968; VI-NEXT: v_mov_b32_e32 v0, s4 969; VI-NEXT: v_mov_b32_e32 v1, s5 970; VI-NEXT: v_mov_b32_e32 v2, s6 971; VI-NEXT: v_mov_b32_e32 v3, s7 972; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16 973; VI-NEXT: s_nop 0 974; VI-NEXT: v_mov_b32_e32 v0, s0 975; VI-NEXT: v_mov_b32_e32 v1, s1 976; VI-NEXT: v_mov_b32_e32 v2, s2 977; VI-NEXT: v_mov_b32_e32 v3, s3 978; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 979; VI-NEXT: s_endpgm 980; 981; EG-LABEL: shl_v4i64: 982; EG: ; %bb.0: 983; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[] 984; EG-NEXT: TEX 3 @6 985; EG-NEXT: ALU 47, @15, KC0[CB0:0-32], KC1[] 986; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 987; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 1 988; EG-NEXT: CF_END 989; EG-NEXT: Fetch clause starting at 6: 990; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 48, #1 991; EG-NEXT: VTX_READ_128 T2.XYZW, T0.X, 0, #1 992; EG-NEXT: VTX_READ_128 T3.XYZW, T0.X, 32, #1 993; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1 994; EG-NEXT: ALU clause starting at 14: 995; EG-NEXT: MOV * T0.X, KC0[2].Z, 996; EG-NEXT: ALU clause starting at 15: 997; EG-NEXT: AND_INT T4.Z, T1.Z, literal.x, 998; EG-NEXT: LSHR T1.W, T0.W, 1, 999; EG-NEXT: NOT_INT * T3.W, T1.Z, 1000; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 1001; EG-NEXT: BIT_ALIGN_INT T4.X, T0.W, T0.Z, 1, 1002; EG-NEXT: AND_INT T1.Y, T3.Z, literal.x, BS:VEC_201 1003; EG-NEXT: LSHR T5.Z, T2.W, 1, BS:VEC_120/SCL_212 1004; EG-NEXT: BIT_ALIGN_INT T0.W, T2.W, T2.Z, 1, BS:VEC_102/SCL_221 1005; EG-NEXT: NOT_INT * T2.W, T3.Z, 1006; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 1007; EG-NEXT: BIT_ALIGN_INT T3.Y, PV.Z, PV.W, PS, 1008; EG-NEXT: LSHL T2.Z, T2.Z, PV.Y, 1009; EG-NEXT: BIT_ALIGN_INT T0.W, T1.W, PV.X, T3.W, 1010; EG-NEXT: LSHL * T1.W, T0.Z, T4.Z, 1011; EG-NEXT: AND_INT T4.X, T1.Z, literal.x, 1012; EG-NEXT: AND_INT T1.Y, T1.X, literal.y, 1013; EG-NEXT: LSHR T0.Z, T0.Y, 1, 1014; EG-NEXT: BIT_ALIGN_INT T2.W, T0.Y, T0.X, 1, 1015; EG-NEXT: NOT_INT * T3.W, T1.X, 1016; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44) 1017; EG-NEXT: AND_INT T5.X, T3.Z, literal.x, 1018; EG-NEXT: BIT_ALIGN_INT T0.Y, PV.Z, PV.W, PS, 1019; EG-NEXT: LSHL T0.Z, T0.X, PV.Y, 1020; EG-NEXT: AND_INT T2.W, T1.X, literal.x, BS:VEC_120/SCL_212 1021; EG-NEXT: CNDE_INT * T4.W, PV.X, T0.W, T1.W, 1022; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1023; EG-NEXT: AND_INT T0.X, T3.X, literal.x, 1024; EG-NEXT: CNDE_INT T4.Y, PV.W, PV.Y, PV.Z, 1025; EG-NEXT: LSHR T1.Z, T2.Y, 1, 1026; EG-NEXT: BIT_ALIGN_INT T0.W, T2.Y, T2.X, 1, 1027; EG-NEXT: NOT_INT * T3.W, T3.X, 1028; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 1029; EG-NEXT: BIT_ALIGN_INT T1.X, PV.Z, PV.W, PS, 1030; EG-NEXT: LSHL T0.Y, T2.X, PV.X, 1031; EG-NEXT: CNDE_INT T4.Z, T4.X, T1.W, 0.0, BS:VEC_120/SCL_212 1032; EG-NEXT: AND_INT * T0.W, T3.X, literal.x, BS:VEC_201 1033; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1034; EG-NEXT: CNDE_INT * T1.W, T5.X, T3.Y, T2.Z, 1035; EG-NEXT: CNDE_INT T4.X, T2.W, T0.Z, 0.0, 1036; EG-NEXT: CNDE_INT T1.Y, T0.W, T1.X, T0.Y, BS:VEC_120/SCL_212 1037; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.x, 1038; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1039; EG-NEXT: LSHR T0.X, PV.W, literal.x, 1040; EG-NEXT: CNDE_INT T1.Z, T5.X, T2.Z, 0.0, 1041; EG-NEXT: CNDE_INT * T1.X, T0.W, T0.Y, 0.0, 1042; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1043; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, 1044; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1045 %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1 1046 %a = load <4 x i64>, <4 x i64> addrspace(1)* %in 1047 %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr 1048 %result = shl <4 x i64> %a, %b 1049 store <4 x i64> %result, <4 x i64> addrspace(1)* %out 1050 ret void 1051} 1052 1053; Make sure load width gets reduced to i32 load. 1054define amdgpu_kernel void @s_shl_32_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) { 1055; SI-LABEL: s_shl_32_i64: 1056; SI: ; %bb.0: 1057; SI-NEXT: s_load_dword s4, s[0:1], 0x13 1058; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1059; SI-NEXT: s_mov_b32 s3, 0xf000 1060; SI-NEXT: s_mov_b32 s2, -1 1061; SI-NEXT: v_mov_b32_e32 v0, 0 1062; SI-NEXT: s_waitcnt lgkmcnt(0) 1063; SI-NEXT: v_mov_b32_e32 v1, s4 1064; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1065; SI-NEXT: s_endpgm 1066; 1067; VI-LABEL: s_shl_32_i64: 1068; VI: ; %bb.0: 1069; VI-NEXT: s_load_dword s4, s[0:1], 0x4c 1070; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1071; VI-NEXT: s_mov_b32 s3, 0xf000 1072; VI-NEXT: s_mov_b32 s2, -1 1073; VI-NEXT: v_mov_b32_e32 v0, 0 1074; VI-NEXT: s_waitcnt lgkmcnt(0) 1075; VI-NEXT: v_mov_b32_e32 v1, s4 1076; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1077; VI-NEXT: s_endpgm 1078; 1079; EG-LABEL: s_shl_32_i64: 1080; EG: ; %bb.0: 1081; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 1082; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1083; EG-NEXT: CF_END 1084; EG-NEXT: PAD 1085; EG-NEXT: ALU clause starting at 4: 1086; EG-NEXT: MOV * T0.Y, KC0[4].W, 1087; EG-NEXT: MOV T0.X, 0.0, 1088; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1089; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1090 %result = shl i64 %a, 32 1091 store i64 %result, i64 addrspace(1)* %out 1092 ret void 1093} 1094 1095define amdgpu_kernel void @v_shl_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { 1096; SI-LABEL: v_shl_32_i64: 1097; SI: ; %bb.0: 1098; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 1099; SI-NEXT: s_ashr_i32 s3, s2, 31 1100; SI-NEXT: s_lshl_b64 s[0:1], s[2:3], 3 1101; SI-NEXT: v_mov_b32_e32 v0, s0 1102; SI-NEXT: s_mov_b32 s11, 0xf000 1103; SI-NEXT: s_mov_b32 s10, 0 1104; SI-NEXT: s_waitcnt lgkmcnt(0) 1105; SI-NEXT: s_mov_b64 s[8:9], s[6:7] 1106; SI-NEXT: v_mov_b32_e32 v1, s1 1107; SI-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 1108; SI-NEXT: s_mov_b64 s[6:7], s[10:11] 1109; SI-NEXT: v_mov_b32_e32 v2, 0 1110; SI-NEXT: s_waitcnt vmcnt(0) 1111; SI-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64 1112; SI-NEXT: s_endpgm 1113; 1114; VI-LABEL: v_shl_32_i64: 1115; VI: ; %bb.0: 1116; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1117; VI-NEXT: s_ashr_i32 s3, s2, 31 1118; VI-NEXT: s_lshl_b64 s[0:1], s[2:3], 3 1119; VI-NEXT: v_mov_b32_e32 v0, 0 1120; VI-NEXT: s_waitcnt lgkmcnt(0) 1121; VI-NEXT: s_add_u32 s2, s6, s0 1122; VI-NEXT: s_addc_u32 s3, s7, s1 1123; VI-NEXT: s_load_dword s2, s[2:3], 0x0 1124; VI-NEXT: s_add_u32 s0, s4, s0 1125; VI-NEXT: s_addc_u32 s1, s5, s1 1126; VI-NEXT: v_mov_b32_e32 v3, s1 1127; VI-NEXT: v_mov_b32_e32 v2, s0 1128; VI-NEXT: s_waitcnt lgkmcnt(0) 1129; VI-NEXT: v_mov_b32_e32 v1, s2 1130; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1131; VI-NEXT: s_endpgm 1132; 1133; EG-LABEL: v_shl_32_i64: 1134; EG: ; %bb.0: 1135; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[] 1136; EG-NEXT: TEX 0 @6 1137; EG-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[] 1138; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T2.X, 1 1139; EG-NEXT: CF_END 1140; EG-NEXT: PAD 1141; EG-NEXT: Fetch clause starting at 6: 1142; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 1143; EG-NEXT: ALU clause starting at 8: 1144; EG-NEXT: LSHL * T0.W, T1.X, literal.x, 1145; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 1146; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, 1147; EG-NEXT: ALU clause starting at 11: 1148; EG-NEXT: MOV T1.X, 0.0, 1149; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W, 1150; EG-NEXT: LSHR T2.X, PV.W, literal.x, 1151; EG-NEXT: MOV * T1.Y, T0.X, 1152; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1153 %tid = call i32 @llvm.amdgcn.workgroup.id.x() #0 1154 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 1155 %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid 1156 %a = load i64, i64 addrspace(1)* %gep.in 1157 %result = shl i64 %a, 32 1158 store i64 %result, i64 addrspace(1)* %gep.out 1159 ret void 1160} 1161 1162define amdgpu_kernel void @s_shl_constant_i64(i64 addrspace(1)* %out, i64 %a) { 1163; SI-LABEL: s_shl_constant_i64: 1164; SI: ; %bb.0: 1165; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1166; SI-NEXT: s_mov_b32 s6, -1 1167; SI-NEXT: s_mov_b32 s9, 0xffff 1168; SI-NEXT: s_mov_b32 s8, s6 1169; SI-NEXT: s_mov_b32 s7, 0xf000 1170; SI-NEXT: s_waitcnt lgkmcnt(0) 1171; SI-NEXT: s_mov_b32 s4, s0 1172; SI-NEXT: s_mov_b32 s5, s1 1173; SI-NEXT: s_lshl_b64 s[0:1], s[8:9], s2 1174; SI-NEXT: v_mov_b32_e32 v0, s0 1175; SI-NEXT: v_mov_b32_e32 v1, s1 1176; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1177; SI-NEXT: s_endpgm 1178; 1179; VI-LABEL: s_shl_constant_i64: 1180; VI: ; %bb.0: 1181; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1182; VI-NEXT: s_mov_b32 s6, -1 1183; VI-NEXT: s_mov_b32 s9, 0xffff 1184; VI-NEXT: s_mov_b32 s8, s6 1185; VI-NEXT: s_mov_b32 s7, 0xf000 1186; VI-NEXT: s_waitcnt lgkmcnt(0) 1187; VI-NEXT: s_mov_b32 s4, s0 1188; VI-NEXT: s_mov_b32 s5, s1 1189; VI-NEXT: s_lshl_b64 s[0:1], s[8:9], s2 1190; VI-NEXT: v_mov_b32_e32 v0, s0 1191; VI-NEXT: v_mov_b32_e32 v1, s1 1192; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1193; VI-NEXT: s_endpgm 1194; 1195; EG-LABEL: s_shl_constant_i64: 1196; EG: ; %bb.0: 1197; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] 1198; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1199; EG-NEXT: CF_END 1200; EG-NEXT: PAD 1201; EG-NEXT: ALU clause starting at 4: 1202; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x, 1203; EG-NEXT: MOV T0.W, literal.y, 1204; EG-NEXT: NOT_INT * T1.W, KC0[2].W, 1205; EG-NEXT: 31(4.344025e-44), -1(nan) 1206; EG-NEXT: BIT_ALIGN_INT T1.Z, literal.x, PV.W, PS, 1207; EG-NEXT: LSHL T0.W, literal.y, PV.Z, 1208; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.z, 1209; EG-NEXT: 32767(4.591635e-41), -1(nan) 1210; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1211; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W, 1212; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, 1213; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1214; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1215 %shl = shl i64 281474976710655, %a 1216 store i64 %shl, i64 addrspace(1)* %out, align 8 1217 ret void 1218} 1219 1220define amdgpu_kernel void @v_shl_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { 1221; SI-LABEL: v_shl_constant_i64: 1222; SI: ; %bb.0: 1223; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1224; SI-NEXT: s_mov_b32 s7, 0xf000 1225; SI-NEXT: s_mov_b32 s6, -1 1226; SI-NEXT: s_mov_b32 s10, s6 1227; SI-NEXT: s_mov_b32 s11, s7 1228; SI-NEXT: s_waitcnt lgkmcnt(0) 1229; SI-NEXT: s_mov_b32 s8, s2 1230; SI-NEXT: s_mov_b32 s9, s3 1231; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1232; SI-NEXT: s_mov_b32 s2, 0xab19b207 1233; SI-NEXT: s_movk_i32 s3, 0x11e 1234; SI-NEXT: s_mov_b32 s4, s0 1235; SI-NEXT: s_mov_b32 s5, s1 1236; SI-NEXT: s_waitcnt vmcnt(0) 1237; SI-NEXT: v_lshl_b64 v[0:1], s[2:3], v0 1238; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1239; SI-NEXT: s_endpgm 1240; 1241; VI-LABEL: v_shl_constant_i64: 1242; VI: ; %bb.0: 1243; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1244; VI-NEXT: s_mov_b32 s7, 0xf000 1245; VI-NEXT: s_mov_b32 s6, -1 1246; VI-NEXT: s_waitcnt lgkmcnt(0) 1247; VI-NEXT: s_load_dword s2, s[2:3], 0x0 1248; VI-NEXT: s_mov_b32 s4, s0 1249; VI-NEXT: s_mov_b32 s5, s1 1250; VI-NEXT: s_mov_b32 s0, 0xab19b207 1251; VI-NEXT: s_movk_i32 s1, 0x11e 1252; VI-NEXT: s_waitcnt lgkmcnt(0) 1253; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 1254; VI-NEXT: v_mov_b32_e32 v0, s0 1255; VI-NEXT: v_mov_b32_e32 v1, s1 1256; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1257; VI-NEXT: s_endpgm 1258; 1259; EG-LABEL: v_shl_constant_i64: 1260; EG: ; %bb.0: 1261; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1262; EG-NEXT: TEX 0 @6 1263; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] 1264; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1265; EG-NEXT: CF_END 1266; EG-NEXT: PAD 1267; EG-NEXT: Fetch clause starting at 6: 1268; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 1269; EG-NEXT: ALU clause starting at 8: 1270; EG-NEXT: MOV * T0.X, KC0[2].Z, 1271; EG-NEXT: ALU clause starting at 9: 1272; EG-NEXT: NOT_INT T0.Z, T0.X, 1273; EG-NEXT: MOV T0.W, literal.x, 1274; EG-NEXT: AND_INT * T1.W, T0.X, literal.y, 1275; EG-NEXT: 1435293955(1.935796e+13), 31(4.344025e-44) 1276; EG-NEXT: LSHL T1.Z, literal.x, PS, 1277; EG-NEXT: BIT_ALIGN_INT T0.W, literal.y, PV.W, PV.Z, 1278; EG-NEXT: AND_INT * T1.W, T0.X, literal.z, 1279; EG-NEXT: -1424379385(-5.460358e-13), 143(2.003857e-43) 1280; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1281; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z, 1282; EG-NEXT: CNDE_INT T0.X, T1.W, T1.Z, 0.0, 1283; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1284; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1285 %a = load i64, i64 addrspace(1)* %aptr, align 8 1286 %shl = shl i64 1231231234567, %a 1287 store i64 %shl, i64 addrspace(1)* %out, align 8 1288 ret void 1289} 1290 1291define amdgpu_kernel void @v_shl_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { 1292; SI-LABEL: v_shl_i64_32_bit_constant: 1293; SI: ; %bb.0: 1294; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1295; SI-NEXT: s_mov_b32 s7, 0xf000 1296; SI-NEXT: s_mov_b32 s6, -1 1297; SI-NEXT: s_mov_b32 s10, s6 1298; SI-NEXT: s_mov_b32 s11, s7 1299; SI-NEXT: s_waitcnt lgkmcnt(0) 1300; SI-NEXT: s_mov_b32 s8, s2 1301; SI-NEXT: s_mov_b32 s9, s3 1302; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1303; SI-NEXT: s_mov_b64 s[2:3], 0x12d687 1304; SI-NEXT: s_mov_b32 s4, s0 1305; SI-NEXT: s_mov_b32 s5, s1 1306; SI-NEXT: s_waitcnt vmcnt(0) 1307; SI-NEXT: v_lshl_b64 v[0:1], s[2:3], v0 1308; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1309; SI-NEXT: s_endpgm 1310; 1311; VI-LABEL: v_shl_i64_32_bit_constant: 1312; VI: ; %bb.0: 1313; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1314; VI-NEXT: s_mov_b32 s7, 0xf000 1315; VI-NEXT: s_mov_b32 s6, -1 1316; VI-NEXT: s_waitcnt lgkmcnt(0) 1317; VI-NEXT: s_load_dword s2, s[2:3], 0x0 1318; VI-NEXT: s_mov_b32 s4, s0 1319; VI-NEXT: s_mov_b32 s5, s1 1320; VI-NEXT: s_mov_b64 s[0:1], 0x12d687 1321; VI-NEXT: s_waitcnt lgkmcnt(0) 1322; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 1323; VI-NEXT: v_mov_b32_e32 v0, s0 1324; VI-NEXT: v_mov_b32_e32 v1, s1 1325; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1326; VI-NEXT: s_endpgm 1327; 1328; EG-LABEL: v_shl_i64_32_bit_constant: 1329; EG: ; %bb.0: 1330; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1331; EG-NEXT: TEX 0 @6 1332; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 1333; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1334; EG-NEXT: CF_END 1335; EG-NEXT: PAD 1336; EG-NEXT: Fetch clause starting at 6: 1337; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 1338; EG-NEXT: ALU clause starting at 8: 1339; EG-NEXT: MOV * T0.X, KC0[2].Z, 1340; EG-NEXT: ALU clause starting at 9: 1341; EG-NEXT: AND_INT T0.W, T0.X, literal.x, 1342; EG-NEXT: NOT_INT * T1.W, T0.X, 1343; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 1344; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, literal.x, PS, 1345; EG-NEXT: LSHL T0.W, literal.y, PV.W, 1346; EG-NEXT: AND_INT * T1.W, T0.X, literal.z, 1347; EG-NEXT: 617283(8.649977e-40), 1234567(1.729997e-39) 1348; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1349; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W, 1350; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, 1351; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1352; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1353 %a = load i64, i64 addrspace(1)* %aptr, align 8 1354 %shl = shl i64 1234567, %a 1355 store i64 %shl, i64 addrspace(1)* %out, align 8 1356 ret void 1357} 1358 1359define amdgpu_kernel void @v_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { 1360; SI-LABEL: v_shl_inline_imm_64_i64: 1361; SI: ; %bb.0: 1362; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1363; SI-NEXT: s_mov_b32 s7, 0xf000 1364; SI-NEXT: s_mov_b32 s6, -1 1365; SI-NEXT: s_mov_b32 s10, s6 1366; SI-NEXT: s_mov_b32 s11, s7 1367; SI-NEXT: s_waitcnt lgkmcnt(0) 1368; SI-NEXT: s_mov_b32 s8, s2 1369; SI-NEXT: s_mov_b32 s9, s3 1370; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1371; SI-NEXT: s_mov_b32 s4, s0 1372; SI-NEXT: s_mov_b32 s5, s1 1373; SI-NEXT: s_waitcnt vmcnt(0) 1374; SI-NEXT: v_lshl_b64 v[0:1], 64, v0 1375; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1376; SI-NEXT: s_endpgm 1377; 1378; VI-LABEL: v_shl_inline_imm_64_i64: 1379; VI: ; %bb.0: 1380; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1381; VI-NEXT: s_waitcnt lgkmcnt(0) 1382; VI-NEXT: s_load_dword s4, s[2:3], 0x0 1383; VI-NEXT: s_mov_b32 s3, 0xf000 1384; VI-NEXT: s_mov_b32 s2, -1 1385; VI-NEXT: s_waitcnt lgkmcnt(0) 1386; VI-NEXT: s_lshl_b64 s[4:5], 64, s4 1387; VI-NEXT: v_mov_b32_e32 v0, s4 1388; VI-NEXT: v_mov_b32_e32 v1, s5 1389; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1390; VI-NEXT: s_endpgm 1391; 1392; EG-LABEL: v_shl_inline_imm_64_i64: 1393; EG: ; %bb.0: 1394; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1395; EG-NEXT: TEX 0 @6 1396; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[] 1397; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1398; EG-NEXT: CF_END 1399; EG-NEXT: PAD 1400; EG-NEXT: Fetch clause starting at 6: 1401; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 1402; EG-NEXT: ALU clause starting at 8: 1403; EG-NEXT: MOV * T0.X, KC0[2].Z, 1404; EG-NEXT: ALU clause starting at 9: 1405; EG-NEXT: AND_INT T0.W, T0.X, literal.x, 1406; EG-NEXT: NOT_INT * T1.W, T0.X, 1407; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 1408; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, literal.x, PS, 1409; EG-NEXT: LSHL T0.W, literal.y, PV.W, 1410; EG-NEXT: AND_INT * T1.W, T0.X, literal.x, 1411; EG-NEXT: 32(4.484155e-44), 64(8.968310e-44) 1412; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W, 1413; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, 1414; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1415; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1416 %a = load i64, i64 addrspace(1)* %aptr, align 8 1417 %shl = shl i64 64, %a 1418 store i64 %shl, i64 addrspace(1)* %out, align 8 1419 ret void 1420} 1421 1422define amdgpu_kernel void @s_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1423; SI-LABEL: s_shl_inline_imm_64_i64: 1424; SI: ; %bb.0: 1425; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1426; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1427; SI-NEXT: s_mov_b32 s3, 0xf000 1428; SI-NEXT: s_mov_b32 s2, -1 1429; SI-NEXT: s_waitcnt lgkmcnt(0) 1430; SI-NEXT: s_lshl_b64 s[4:5], 64, s4 1431; SI-NEXT: v_mov_b32_e32 v0, s4 1432; SI-NEXT: v_mov_b32_e32 v1, s5 1433; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1434; SI-NEXT: s_endpgm 1435; 1436; VI-LABEL: s_shl_inline_imm_64_i64: 1437; VI: ; %bb.0: 1438; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1439; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1440; VI-NEXT: s_mov_b32 s3, 0xf000 1441; VI-NEXT: s_mov_b32 s2, -1 1442; VI-NEXT: s_waitcnt lgkmcnt(0) 1443; VI-NEXT: s_lshl_b64 s[4:5], 64, s4 1444; VI-NEXT: v_mov_b32_e32 v0, s4 1445; VI-NEXT: v_mov_b32_e32 v1, s5 1446; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1447; VI-NEXT: s_endpgm 1448; 1449; EG-LABEL: s_shl_inline_imm_64_i64: 1450; EG: ; %bb.0: 1451; EG-NEXT: ALU 10, @4, KC0[CB0:0-32], KC1[] 1452; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1453; EG-NEXT: CF_END 1454; EG-NEXT: PAD 1455; EG-NEXT: ALU clause starting at 4: 1456; EG-NEXT: NOT_INT T0.W, KC0[2].W, 1457; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.x, 1458; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 1459; EG-NEXT: LSHL T0.Z, literal.x, PS, 1460; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, literal.y, PV.W, 1461; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1462; EG-NEXT: 64(8.968310e-44), 32(4.484155e-44) 1463; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z, 1464; EG-NEXT: CNDE_INT T0.X, T1.W, T0.Z, 0.0, 1465; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1466; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1467 %shl = shl i64 64, %a 1468 store i64 %shl, i64 addrspace(1)* %out, align 8 1469 ret void 1470} 1471 1472define amdgpu_kernel void @s_shl_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1473; SI-LABEL: s_shl_inline_imm_1_i64: 1474; SI: ; %bb.0: 1475; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1476; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1477; SI-NEXT: s_mov_b32 s3, 0xf000 1478; SI-NEXT: s_mov_b32 s2, -1 1479; SI-NEXT: s_waitcnt lgkmcnt(0) 1480; SI-NEXT: s_lshl_b64 s[4:5], 1, s4 1481; SI-NEXT: v_mov_b32_e32 v0, s4 1482; SI-NEXT: v_mov_b32_e32 v1, s5 1483; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1484; SI-NEXT: s_endpgm 1485; 1486; VI-LABEL: s_shl_inline_imm_1_i64: 1487; VI: ; %bb.0: 1488; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1489; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1490; VI-NEXT: s_mov_b32 s3, 0xf000 1491; VI-NEXT: s_mov_b32 s2, -1 1492; VI-NEXT: s_waitcnt lgkmcnt(0) 1493; VI-NEXT: s_lshl_b64 s[4:5], 1, s4 1494; VI-NEXT: v_mov_b32_e32 v0, s4 1495; VI-NEXT: v_mov_b32_e32 v1, s5 1496; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1497; VI-NEXT: s_endpgm 1498; 1499; EG-LABEL: s_shl_inline_imm_1_i64: 1500; EG: ; %bb.0: 1501; EG-NEXT: ALU 11, @4, KC0[CB0:0-32], KC1[] 1502; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1503; EG-NEXT: CF_END 1504; EG-NEXT: PAD 1505; EG-NEXT: ALU clause starting at 4: 1506; EG-NEXT: AND_INT T0.W, KC0[2].W, literal.x, 1507; EG-NEXT: LSHL * T1.W, KC0[2].W, literal.y, 1508; EG-NEXT: 31(4.344025e-44), 26(3.643376e-44) 1509; EG-NEXT: ASHR T1.W, PS, literal.x, 1510; EG-NEXT: LSHL * T0.W, 1, PV.W, 1511; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 1512; EG-NEXT: AND_INT T0.Y, PV.W, PS, 1513; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.x, 1514; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1515; EG-NEXT: CNDE_INT T0.X, PV.W, T0.W, 0.0, 1516; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1517; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1518 %shl = shl i64 1, %a 1519 store i64 %shl, i64 addrspace(1)* %out, align 8 1520 ret void 1521} 1522 1523define amdgpu_kernel void @s_shl_inline_imm_1_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1524; SI-LABEL: s_shl_inline_imm_1_0_i64: 1525; SI: ; %bb.0: 1526; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1527; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1528; SI-NEXT: s_mov_b32 s3, 0xf000 1529; SI-NEXT: s_mov_b32 s2, -1 1530; SI-NEXT: s_waitcnt lgkmcnt(0) 1531; SI-NEXT: s_lshl_b64 s[4:5], 1.0, s4 1532; SI-NEXT: v_mov_b32_e32 v0, s4 1533; SI-NEXT: v_mov_b32_e32 v1, s5 1534; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1535; SI-NEXT: s_endpgm 1536; 1537; VI-LABEL: s_shl_inline_imm_1_0_i64: 1538; VI: ; %bb.0: 1539; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1540; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1541; VI-NEXT: s_mov_b32 s3, 0xf000 1542; VI-NEXT: s_mov_b32 s2, -1 1543; VI-NEXT: s_waitcnt lgkmcnt(0) 1544; VI-NEXT: s_lshl_b64 s[4:5], 1.0, s4 1545; VI-NEXT: v_mov_b32_e32 v0, s4 1546; VI-NEXT: v_mov_b32_e32 v1, s5 1547; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1548; VI-NEXT: s_endpgm 1549; 1550; EG-LABEL: s_shl_inline_imm_1_0_i64: 1551; EG: ; %bb.0: 1552; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1553; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1554; EG-NEXT: CF_END 1555; EG-NEXT: PAD 1556; EG-NEXT: ALU clause starting at 4: 1557; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1558; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1559; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1560; EG-NEXT: 536346624(1.050321e-19), 32(4.484155e-44) 1561; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1562; EG-NEXT: MOV T0.X, 0.0, 1563; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1564; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1565 %shl = shl i64 4607182418800017408, %a 1566 store i64 %shl, i64 addrspace(1)* %out, align 8 1567 ret void 1568} 1569 1570define amdgpu_kernel void @s_shl_inline_imm_neg_1_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1571; SI-LABEL: s_shl_inline_imm_neg_1_0_i64: 1572; SI: ; %bb.0: 1573; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1574; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1575; SI-NEXT: s_mov_b32 s3, 0xf000 1576; SI-NEXT: s_mov_b32 s2, -1 1577; SI-NEXT: s_waitcnt lgkmcnt(0) 1578; SI-NEXT: s_lshl_b64 s[4:5], -1.0, s4 1579; SI-NEXT: v_mov_b32_e32 v0, s4 1580; SI-NEXT: v_mov_b32_e32 v1, s5 1581; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1582; SI-NEXT: s_endpgm 1583; 1584; VI-LABEL: s_shl_inline_imm_neg_1_0_i64: 1585; VI: ; %bb.0: 1586; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1587; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1588; VI-NEXT: s_mov_b32 s3, 0xf000 1589; VI-NEXT: s_mov_b32 s2, -1 1590; VI-NEXT: s_waitcnt lgkmcnt(0) 1591; VI-NEXT: s_lshl_b64 s[4:5], -1.0, s4 1592; VI-NEXT: v_mov_b32_e32 v0, s4 1593; VI-NEXT: v_mov_b32_e32 v1, s5 1594; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1595; VI-NEXT: s_endpgm 1596; 1597; EG-LABEL: s_shl_inline_imm_neg_1_0_i64: 1598; EG: ; %bb.0: 1599; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1600; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1601; EG-NEXT: CF_END 1602; EG-NEXT: PAD 1603; EG-NEXT: ALU clause starting at 4: 1604; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1605; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1606; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1607; EG-NEXT: 1610088448(3.574057e+19), 32(4.484155e-44) 1608; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1609; EG-NEXT: MOV T0.X, 0.0, 1610; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1611; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1612 %shl = shl i64 13830554455654793216, %a 1613 store i64 %shl, i64 addrspace(1)* %out, align 8 1614 ret void 1615} 1616 1617define amdgpu_kernel void @s_shl_inline_imm_0_5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1618; SI-LABEL: s_shl_inline_imm_0_5_i64: 1619; SI: ; %bb.0: 1620; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1621; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1622; SI-NEXT: s_mov_b32 s3, 0xf000 1623; SI-NEXT: s_mov_b32 s2, -1 1624; SI-NEXT: s_waitcnt lgkmcnt(0) 1625; SI-NEXT: s_lshl_b64 s[4:5], 0.5, s4 1626; SI-NEXT: v_mov_b32_e32 v0, s4 1627; SI-NEXT: v_mov_b32_e32 v1, s5 1628; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1629; SI-NEXT: s_endpgm 1630; 1631; VI-LABEL: s_shl_inline_imm_0_5_i64: 1632; VI: ; %bb.0: 1633; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1634; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1635; VI-NEXT: s_mov_b32 s3, 0xf000 1636; VI-NEXT: s_mov_b32 s2, -1 1637; VI-NEXT: s_waitcnt lgkmcnt(0) 1638; VI-NEXT: s_lshl_b64 s[4:5], 0.5, s4 1639; VI-NEXT: v_mov_b32_e32 v0, s4 1640; VI-NEXT: v_mov_b32_e32 v1, s5 1641; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1642; VI-NEXT: s_endpgm 1643; 1644; EG-LABEL: s_shl_inline_imm_0_5_i64: 1645; EG: ; %bb.0: 1646; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1647; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1648; EG-NEXT: CF_END 1649; EG-NEXT: PAD 1650; EG-NEXT: ALU clause starting at 4: 1651; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1652; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1653; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1654; EG-NEXT: 535822336(1.016440e-19), 32(4.484155e-44) 1655; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1656; EG-NEXT: MOV T0.X, 0.0, 1657; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1658; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1659 %shl = shl i64 4602678819172646912, %a 1660 store i64 %shl, i64 addrspace(1)* %out, align 8 1661 ret void 1662} 1663 1664define amdgpu_kernel void @s_shl_inline_imm_neg_0_5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1665; SI-LABEL: s_shl_inline_imm_neg_0_5_i64: 1666; SI: ; %bb.0: 1667; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1668; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1669; SI-NEXT: s_mov_b32 s3, 0xf000 1670; SI-NEXT: s_mov_b32 s2, -1 1671; SI-NEXT: s_waitcnt lgkmcnt(0) 1672; SI-NEXT: s_lshl_b64 s[4:5], -0.5, s4 1673; SI-NEXT: v_mov_b32_e32 v0, s4 1674; SI-NEXT: v_mov_b32_e32 v1, s5 1675; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1676; SI-NEXT: s_endpgm 1677; 1678; VI-LABEL: s_shl_inline_imm_neg_0_5_i64: 1679; VI: ; %bb.0: 1680; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1681; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1682; VI-NEXT: s_mov_b32 s3, 0xf000 1683; VI-NEXT: s_mov_b32 s2, -1 1684; VI-NEXT: s_waitcnt lgkmcnt(0) 1685; VI-NEXT: s_lshl_b64 s[4:5], -0.5, s4 1686; VI-NEXT: v_mov_b32_e32 v0, s4 1687; VI-NEXT: v_mov_b32_e32 v1, s5 1688; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1689; VI-NEXT: s_endpgm 1690; 1691; EG-LABEL: s_shl_inline_imm_neg_0_5_i64: 1692; EG: ; %bb.0: 1693; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1694; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1695; EG-NEXT: CF_END 1696; EG-NEXT: PAD 1697; EG-NEXT: ALU clause starting at 4: 1698; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1699; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1700; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1701; EG-NEXT: 1609564160(3.458765e+19), 32(4.484155e-44) 1702; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1703; EG-NEXT: MOV T0.X, 0.0, 1704; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1705; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1706 %shl = shl i64 13826050856027422720, %a 1707 store i64 %shl, i64 addrspace(1)* %out, align 8 1708 ret void 1709} 1710 1711define amdgpu_kernel void @s_shl_inline_imm_2_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1712; SI-LABEL: s_shl_inline_imm_2_0_i64: 1713; SI: ; %bb.0: 1714; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1715; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1716; SI-NEXT: s_mov_b32 s3, 0xf000 1717; SI-NEXT: s_mov_b32 s2, -1 1718; SI-NEXT: s_waitcnt lgkmcnt(0) 1719; SI-NEXT: s_lshl_b64 s[4:5], 2.0, s4 1720; SI-NEXT: v_mov_b32_e32 v0, s4 1721; SI-NEXT: v_mov_b32_e32 v1, s5 1722; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1723; SI-NEXT: s_endpgm 1724; 1725; VI-LABEL: s_shl_inline_imm_2_0_i64: 1726; VI: ; %bb.0: 1727; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1728; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1729; VI-NEXT: s_mov_b32 s3, 0xf000 1730; VI-NEXT: s_mov_b32 s2, -1 1731; VI-NEXT: s_waitcnt lgkmcnt(0) 1732; VI-NEXT: s_lshl_b64 s[4:5], 2.0, s4 1733; VI-NEXT: v_mov_b32_e32 v0, s4 1734; VI-NEXT: v_mov_b32_e32 v1, s5 1735; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1736; VI-NEXT: s_endpgm 1737; 1738; EG-LABEL: s_shl_inline_imm_2_0_i64: 1739; EG: ; %bb.0: 1740; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1741; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1742; EG-NEXT: CF_END 1743; EG-NEXT: PAD 1744; EG-NEXT: ALU clause starting at 4: 1745; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1746; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1747; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1748; EG-NEXT: 536870912(1.084202e-19), 32(4.484155e-44) 1749; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1750; EG-NEXT: MOV T0.X, 0.0, 1751; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1752; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1753 %shl = shl i64 4611686018427387904, %a 1754 store i64 %shl, i64 addrspace(1)* %out, align 8 1755 ret void 1756} 1757 1758define amdgpu_kernel void @s_shl_inline_imm_neg_2_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1759; SI-LABEL: s_shl_inline_imm_neg_2_0_i64: 1760; SI: ; %bb.0: 1761; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1762; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1763; SI-NEXT: s_mov_b32 s3, 0xf000 1764; SI-NEXT: s_mov_b32 s2, -1 1765; SI-NEXT: s_waitcnt lgkmcnt(0) 1766; SI-NEXT: s_lshl_b64 s[4:5], -2.0, s4 1767; SI-NEXT: v_mov_b32_e32 v0, s4 1768; SI-NEXT: v_mov_b32_e32 v1, s5 1769; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1770; SI-NEXT: s_endpgm 1771; 1772; VI-LABEL: s_shl_inline_imm_neg_2_0_i64: 1773; VI: ; %bb.0: 1774; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1775; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1776; VI-NEXT: s_mov_b32 s3, 0xf000 1777; VI-NEXT: s_mov_b32 s2, -1 1778; VI-NEXT: s_waitcnt lgkmcnt(0) 1779; VI-NEXT: s_lshl_b64 s[4:5], -2.0, s4 1780; VI-NEXT: v_mov_b32_e32 v0, s4 1781; VI-NEXT: v_mov_b32_e32 v1, s5 1782; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1783; VI-NEXT: s_endpgm 1784; 1785; EG-LABEL: s_shl_inline_imm_neg_2_0_i64: 1786; EG: ; %bb.0: 1787; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1788; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1789; EG-NEXT: CF_END 1790; EG-NEXT: PAD 1791; EG-NEXT: ALU clause starting at 4: 1792; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1793; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1794; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1795; EG-NEXT: 1610612736(3.689349e+19), 32(4.484155e-44) 1796; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1797; EG-NEXT: MOV T0.X, 0.0, 1798; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1799; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1800 %shl = shl i64 13835058055282163712, %a 1801 store i64 %shl, i64 addrspace(1)* %out, align 8 1802 ret void 1803} 1804 1805define amdgpu_kernel void @s_shl_inline_imm_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1806; SI-LABEL: s_shl_inline_imm_4_0_i64: 1807; SI: ; %bb.0: 1808; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1809; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1810; SI-NEXT: s_mov_b32 s3, 0xf000 1811; SI-NEXT: s_mov_b32 s2, -1 1812; SI-NEXT: s_waitcnt lgkmcnt(0) 1813; SI-NEXT: s_lshl_b64 s[4:5], 4.0, s4 1814; SI-NEXT: v_mov_b32_e32 v0, s4 1815; SI-NEXT: v_mov_b32_e32 v1, s5 1816; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1817; SI-NEXT: s_endpgm 1818; 1819; VI-LABEL: s_shl_inline_imm_4_0_i64: 1820; VI: ; %bb.0: 1821; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1822; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1823; VI-NEXT: s_mov_b32 s3, 0xf000 1824; VI-NEXT: s_mov_b32 s2, -1 1825; VI-NEXT: s_waitcnt lgkmcnt(0) 1826; VI-NEXT: s_lshl_b64 s[4:5], 4.0, s4 1827; VI-NEXT: v_mov_b32_e32 v0, s4 1828; VI-NEXT: v_mov_b32_e32 v1, s5 1829; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1830; VI-NEXT: s_endpgm 1831; 1832; EG-LABEL: s_shl_inline_imm_4_0_i64: 1833; EG: ; %bb.0: 1834; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1835; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1836; EG-NEXT: CF_END 1837; EG-NEXT: PAD 1838; EG-NEXT: ALU clause starting at 4: 1839; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1840; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1841; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1842; EG-NEXT: 537395200(1.151965e-19), 32(4.484155e-44) 1843; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1844; EG-NEXT: MOV T0.X, 0.0, 1845; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1846; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1847 %shl = shl i64 4616189618054758400, %a 1848 store i64 %shl, i64 addrspace(1)* %out, align 8 1849 ret void 1850} 1851 1852define amdgpu_kernel void @s_shl_inline_imm_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1853; SI-LABEL: s_shl_inline_imm_neg_4_0_i64: 1854; SI: ; %bb.0: 1855; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1856; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1857; SI-NEXT: s_mov_b32 s3, 0xf000 1858; SI-NEXT: s_mov_b32 s2, -1 1859; SI-NEXT: s_waitcnt lgkmcnt(0) 1860; SI-NEXT: s_lshl_b64 s[4:5], -4.0, s4 1861; SI-NEXT: v_mov_b32_e32 v0, s4 1862; SI-NEXT: v_mov_b32_e32 v1, s5 1863; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1864; SI-NEXT: s_endpgm 1865; 1866; VI-LABEL: s_shl_inline_imm_neg_4_0_i64: 1867; VI: ; %bb.0: 1868; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1869; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1870; VI-NEXT: s_mov_b32 s3, 0xf000 1871; VI-NEXT: s_mov_b32 s2, -1 1872; VI-NEXT: s_waitcnt lgkmcnt(0) 1873; VI-NEXT: s_lshl_b64 s[4:5], -4.0, s4 1874; VI-NEXT: v_mov_b32_e32 v0, s4 1875; VI-NEXT: v_mov_b32_e32 v1, s5 1876; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1877; VI-NEXT: s_endpgm 1878; 1879; EG-LABEL: s_shl_inline_imm_neg_4_0_i64: 1880; EG: ; %bb.0: 1881; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1882; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1883; EG-NEXT: CF_END 1884; EG-NEXT: PAD 1885; EG-NEXT: ALU clause starting at 4: 1886; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1887; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1888; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1889; EG-NEXT: 1611137024(3.919933e+19), 32(4.484155e-44) 1890; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1891; EG-NEXT: MOV T0.X, 0.0, 1892; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1893; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1894 %shl = shl i64 13839561654909534208, %a 1895 store i64 %shl, i64 addrspace(1)* %out, align 8 1896 ret void 1897} 1898 1899 1900; Test with the 64-bit integer bitpattern for a 32-bit float in the 1901; low 32-bits, which is not a valid 64-bit inline immmediate. 1902define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1903; SI-LABEL: s_shl_inline_imm_f32_4_0_i64: 1904; SI: ; %bb.0: 1905; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1906; SI-NEXT: s_load_dword s2, s[0:1], 0xd 1907; SI-NEXT: s_mov_b64 s[0:1], 0x40800000 1908; SI-NEXT: s_mov_b32 s7, 0xf000 1909; SI-NEXT: s_mov_b32 s6, -1 1910; SI-NEXT: s_waitcnt lgkmcnt(0) 1911; SI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 1912; SI-NEXT: v_mov_b32_e32 v0, s0 1913; SI-NEXT: v_mov_b32_e32 v1, s1 1914; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1915; SI-NEXT: s_endpgm 1916; 1917; VI-LABEL: s_shl_inline_imm_f32_4_0_i64: 1918; VI: ; %bb.0: 1919; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 1920; VI-NEXT: s_load_dword s2, s[0:1], 0x34 1921; VI-NEXT: s_mov_b64 s[0:1], 0x40800000 1922; VI-NEXT: s_mov_b32 s7, 0xf000 1923; VI-NEXT: s_mov_b32 s6, -1 1924; VI-NEXT: s_waitcnt lgkmcnt(0) 1925; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 1926; VI-NEXT: v_mov_b32_e32 v0, s0 1927; VI-NEXT: v_mov_b32_e32 v1, s1 1928; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1929; VI-NEXT: s_endpgm 1930; 1931; EG-LABEL: s_shl_inline_imm_f32_4_0_i64: 1932; EG: ; %bb.0: 1933; EG-NEXT: ALU 11, @4, KC0[CB0:0-32], KC1[] 1934; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1935; EG-NEXT: CF_END 1936; EG-NEXT: PAD 1937; EG-NEXT: ALU clause starting at 4: 1938; EG-NEXT: NOT_INT T0.W, KC0[2].W, 1939; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.x, 1940; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 1941; EG-NEXT: LSHL T0.Z, literal.x, PS, 1942; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, literal.y, PV.W, 1943; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.z, 1944; EG-NEXT: 1082130432(4.000000e+00), 541065216(1.626303e-19) 1945; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1946; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z, 1947; EG-NEXT: CNDE_INT T0.X, T1.W, T0.Z, 0.0, 1948; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1949; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1950 %shl = shl i64 1082130432, %a 1951 store i64 %shl, i64 addrspace(1)* %out, align 8 1952 ret void 1953} 1954 1955; FIXME: Copy of -1 register 1956define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1957; SI-LABEL: s_shl_inline_imm_f32_neg_4_0_i64: 1958; SI: ; %bb.0: 1959; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1960; SI-NEXT: s_load_dword s2, s[0:1], 0xd 1961; SI-NEXT: s_mov_b32 s6, -1 1962; SI-NEXT: s_mov_b32 s0, -4.0 1963; SI-NEXT: s_mov_b32 s1, s6 1964; SI-NEXT: s_mov_b32 s7, 0xf000 1965; SI-NEXT: s_waitcnt lgkmcnt(0) 1966; SI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 1967; SI-NEXT: v_mov_b32_e32 v0, s0 1968; SI-NEXT: v_mov_b32_e32 v1, s1 1969; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1970; SI-NEXT: s_endpgm 1971; 1972; VI-LABEL: s_shl_inline_imm_f32_neg_4_0_i64: 1973; VI: ; %bb.0: 1974; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 1975; VI-NEXT: s_load_dword s2, s[0:1], 0x34 1976; VI-NEXT: s_mov_b32 s6, -1 1977; VI-NEXT: s_mov_b32 s0, -4.0 1978; VI-NEXT: s_mov_b32 s1, s6 1979; VI-NEXT: s_mov_b32 s7, 0xf000 1980; VI-NEXT: s_waitcnt lgkmcnt(0) 1981; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 1982; VI-NEXT: v_mov_b32_e32 v0, s0 1983; VI-NEXT: v_mov_b32_e32 v1, s1 1984; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1985; VI-NEXT: s_endpgm 1986; 1987; EG-LABEL: s_shl_inline_imm_f32_neg_4_0_i64: 1988; EG: ; %bb.0: 1989; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] 1990; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1991; EG-NEXT: CF_END 1992; EG-NEXT: PAD 1993; EG-NEXT: ALU clause starting at 4: 1994; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x, 1995; EG-NEXT: MOV T0.W, literal.y, 1996; EG-NEXT: NOT_INT * T1.W, KC0[2].W, 1997; EG-NEXT: 31(4.344025e-44), -532676608(-5.534023e+19) 1998; EG-NEXT: BIT_ALIGN_INT T1.Z, literal.x, PV.W, PS, 1999; EG-NEXT: LSHL T0.W, literal.y, PV.Z, 2000; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.z, 2001; EG-NEXT: 2147483647(nan), -1065353216(-4.000000e+00) 2002; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 2003; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W, 2004; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, 2005; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 2006; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2007 %shl = shl i64 -1065353216, %a 2008 store i64 %shl, i64 addrspace(1)* %out, align 8 2009 ret void 2010} 2011 2012define amdgpu_kernel void @s_shl_inline_high_imm_f32_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 2013; SI-LABEL: s_shl_inline_high_imm_f32_4_0_i64: 2014; SI: ; %bb.0: 2015; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2016; SI-NEXT: s_load_dword s2, s[0:1], 0xd 2017; SI-NEXT: s_mov_b32 s0, 0 2018; SI-NEXT: s_mov_b32 s1, 4.0 2019; SI-NEXT: s_mov_b32 s7, 0xf000 2020; SI-NEXT: s_mov_b32 s6, -1 2021; SI-NEXT: s_waitcnt lgkmcnt(0) 2022; SI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 2023; SI-NEXT: v_mov_b32_e32 v0, s0 2024; SI-NEXT: v_mov_b32_e32 v1, s1 2025; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 2026; SI-NEXT: s_endpgm 2027; 2028; VI-LABEL: s_shl_inline_high_imm_f32_4_0_i64: 2029; VI: ; %bb.0: 2030; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 2031; VI-NEXT: s_load_dword s2, s[0:1], 0x34 2032; VI-NEXT: s_mov_b32 s0, 0 2033; VI-NEXT: s_mov_b32 s1, 4.0 2034; VI-NEXT: s_mov_b32 s7, 0xf000 2035; VI-NEXT: s_mov_b32 s6, -1 2036; VI-NEXT: s_waitcnt lgkmcnt(0) 2037; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 2038; VI-NEXT: v_mov_b32_e32 v0, s0 2039; VI-NEXT: v_mov_b32_e32 v1, s1 2040; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 2041; VI-NEXT: s_endpgm 2042; 2043; EG-LABEL: s_shl_inline_high_imm_f32_4_0_i64: 2044; EG: ; %bb.0: 2045; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 2046; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 2047; EG-NEXT: CF_END 2048; EG-NEXT: PAD 2049; EG-NEXT: ALU clause starting at 4: 2050; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 2051; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 2052; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 2053; EG-NEXT: 541065216(1.626303e-19), 32(4.484155e-44) 2054; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 2055; EG-NEXT: MOV T0.X, 0.0, 2056; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 2057; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2058 %shl = shl i64 4647714815446351872, %a 2059 store i64 %shl, i64 addrspace(1)* %out, align 8 2060 ret void 2061} 2062 2063define amdgpu_kernel void @s_shl_inline_high_imm_f32_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 2064; SI-LABEL: s_shl_inline_high_imm_f32_neg_4_0_i64: 2065; SI: ; %bb.0: 2066; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2067; SI-NEXT: s_load_dword s2, s[0:1], 0xd 2068; SI-NEXT: s_mov_b32 s0, 0 2069; SI-NEXT: s_mov_b32 s1, -4.0 2070; SI-NEXT: s_mov_b32 s7, 0xf000 2071; SI-NEXT: s_mov_b32 s6, -1 2072; SI-NEXT: s_waitcnt lgkmcnt(0) 2073; SI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 2074; SI-NEXT: v_mov_b32_e32 v0, s0 2075; SI-NEXT: v_mov_b32_e32 v1, s1 2076; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 2077; SI-NEXT: s_endpgm 2078; 2079; VI-LABEL: s_shl_inline_high_imm_f32_neg_4_0_i64: 2080; VI: ; %bb.0: 2081; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 2082; VI-NEXT: s_load_dword s2, s[0:1], 0x34 2083; VI-NEXT: s_mov_b32 s0, 0 2084; VI-NEXT: s_mov_b32 s1, -4.0 2085; VI-NEXT: s_mov_b32 s7, 0xf000 2086; VI-NEXT: s_mov_b32 s6, -1 2087; VI-NEXT: s_waitcnt lgkmcnt(0) 2088; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 2089; VI-NEXT: v_mov_b32_e32 v0, s0 2090; VI-NEXT: v_mov_b32_e32 v1, s1 2091; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 2092; VI-NEXT: s_endpgm 2093; 2094; EG-LABEL: s_shl_inline_high_imm_f32_neg_4_0_i64: 2095; EG: ; %bb.0: 2096; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 2097; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 2098; EG-NEXT: CF_END 2099; EG-NEXT: PAD 2100; EG-NEXT: ALU clause starting at 4: 2101; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 2102; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 2103; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 2104; EG-NEXT: 1614807040(5.534023e+19), 32(4.484155e-44) 2105; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 2106; EG-NEXT: MOV T0.X, 0.0, 2107; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 2108; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2109 %shl = shl i64 13871086852301127680, %a 2110 store i64 %shl, i64 addrspace(1)* %out, align 8 2111 ret void 2112} 2113 2114define amdgpu_kernel void @test_mul2(i32 %p) { 2115; SI-LABEL: test_mul2: 2116; SI: ; %bb.0: 2117; SI-NEXT: s_load_dword s0, s[0:1], 0x9 2118; SI-NEXT: s_mov_b32 s3, 0xf000 2119; SI-NEXT: s_mov_b32 s2, -1 2120; SI-NEXT: s_waitcnt lgkmcnt(0) 2121; SI-NEXT: s_lshl_b32 s0, s0, 1 2122; SI-NEXT: v_mov_b32_e32 v0, s0 2123; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 2124; SI-NEXT: s_waitcnt vmcnt(0) 2125; SI-NEXT: s_endpgm 2126; 2127; VI-LABEL: test_mul2: 2128; VI: ; %bb.0: 2129; VI-NEXT: s_load_dword s0, s[0:1], 0x24 2130; VI-NEXT: s_mov_b32 s3, 0xf000 2131; VI-NEXT: s_mov_b32 s2, -1 2132; VI-NEXT: s_waitcnt lgkmcnt(0) 2133; VI-NEXT: s_lshl_b32 s0, s0, 1 2134; VI-NEXT: v_mov_b32_e32 v0, s0 2135; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 2136; VI-NEXT: s_waitcnt vmcnt(0) 2137; VI-NEXT: s_endpgm 2138; 2139; EG-LABEL: test_mul2: 2140; EG: ; %bb.0: 2141; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 2142; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 2143; EG-NEXT: CF_END 2144; EG-NEXT: PAD 2145; EG-NEXT: ALU clause starting at 4: 2146; EG-NEXT: MOV T0.X, literal.x, 2147; EG-NEXT: LSHL * T1.X, KC0[2].Y, 1, 2148; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00) 2149 %i = mul i32 %p, 2 2150 store volatile i32 %i, i32 addrspace(1)* undef 2151 ret void 2152} 2153 2154define void @shl_or_k(i32 addrspace(1)* %out, i32 %in) { 2155; SI-LABEL: shl_or_k: 2156; SI: ; %bb.0: 2157; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2158; SI-NEXT: s_mov_b32 s6, 0 2159; SI-NEXT: v_lshlrev_b32_e32 v2, 2, v2 2160; SI-NEXT: s_mov_b32 s7, 0xf000 2161; SI-NEXT: s_mov_b32 s4, s6 2162; SI-NEXT: s_mov_b32 s5, s6 2163; SI-NEXT: v_or_b32_e32 v2, 4, v2 2164; SI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 2165; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2166; SI-NEXT: s_setpc_b64 s[30:31] 2167; 2168; VI-LABEL: shl_or_k: 2169; VI: ; %bb.0: 2170; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2171; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v2 2172; VI-NEXT: v_or_b32_e32 v2, 4, v2 2173; VI-NEXT: flat_store_dword v[0:1], v2 2174; VI-NEXT: s_waitcnt vmcnt(0) 2175; VI-NEXT: s_setpc_b64 s[30:31] 2176; 2177; EG-LABEL: shl_or_k: 2178; EG: ; %bb.0: 2179; EG-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] 2180; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 2181; EG-NEXT: CF_END 2182; EG-NEXT: PAD 2183; EG-NEXT: ALU clause starting at 4: 2184; EG-NEXT: LSHL * T0.W, KC0[2].Z, literal.x, 2185; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2186; EG-NEXT: OR_INT T0.X, PV.W, literal.x, 2187; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 2188; EG-NEXT: 4(5.605194e-45), 2(2.802597e-45) 2189 %tmp0 = or i32 %in, 1 2190 %tmp2 = shl i32 %tmp0, 2 2191 store i32 %tmp2, i32 addrspace(1)* %out 2192 ret void 2193} 2194 2195define void @shl_or_k_two_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %in) { 2196; SI-LABEL: shl_or_k_two_uses: 2197; SI: ; %bb.0: 2198; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2199; SI-NEXT: s_mov_b32 s6, 0 2200; SI-NEXT: v_or_b32_e32 v4, 1, v4 2201; SI-NEXT: s_mov_b32 s7, 0xf000 2202; SI-NEXT: s_mov_b32 s4, s6 2203; SI-NEXT: s_mov_b32 s5, s6 2204; SI-NEXT: v_lshlrev_b32_e32 v5, 2, v4 2205; SI-NEXT: buffer_store_dword v5, v[0:1], s[4:7], 0 addr64 2206; SI-NEXT: buffer_store_dword v4, v[2:3], s[4:7], 0 addr64 2207; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2208; SI-NEXT: s_setpc_b64 s[30:31] 2209; 2210; VI-LABEL: shl_or_k_two_uses: 2211; VI: ; %bb.0: 2212; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2213; VI-NEXT: v_or_b32_e32 v4, 1, v4 2214; VI-NEXT: v_lshlrev_b32_e32 v5, 2, v4 2215; VI-NEXT: flat_store_dword v[0:1], v5 2216; VI-NEXT: flat_store_dword v[2:3], v4 2217; VI-NEXT: s_waitcnt vmcnt(0) 2218; VI-NEXT: s_setpc_b64 s[30:31] 2219; 2220; EG-LABEL: shl_or_k_two_uses: 2221; EG: ; %bb.0: 2222; EG-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] 2223; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 2224; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 2225; EG-NEXT: CF_END 2226; EG-NEXT: ALU clause starting at 4: 2227; EG-NEXT: LSHR T0.X, KC0[2].Z, literal.x, 2228; EG-NEXT: OR_INT * T1.X, KC0[2].W, 1, 2229; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2230; EG-NEXT: LSHL T2.X, PS, literal.x, 2231; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, 2232; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2233 %tmp0 = or i32 %in, 1 2234 %tmp2 = shl i32 %tmp0, 2 2235 store i32 %tmp2, i32 addrspace(1)* %out0 2236 store i32 %tmp0, i32 addrspace(1)* %out1 2237 ret void 2238} 2239 2240attributes #0 = { nounwind readnone } 2241