1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -amdgpu-scalarize-global-loads=false -march=amdgcn -mtriple=amdgcn-- -mcpu=verde -verify-machineinstrs | FileCheck %s --check-prefixes=SI 3; RUN: llc < %s -march=amdgcn -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s -check-prefixes=VI 4; RUN: llc < %s -amdgpu-scalarize-global-loads=false -march=r600 -mtriple=r600-- -mcpu=redwood -verify-machineinstrs | FileCheck %s --check-prefixes=EG 5 6declare i32 @llvm.amdgcn.workitem.id.x() #0 7 8declare i32 @llvm.amdgcn.workgroup.id.x() #0 9 10define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { 11; SI-LABEL: shl_v2i32: 12; SI: ; %bb.0: 13; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 14; SI-NEXT: s_mov_b32 s7, 0xf000 15; SI-NEXT: s_mov_b32 s6, -1 16; SI-NEXT: s_mov_b32 s10, s6 17; SI-NEXT: s_mov_b32 s11, s7 18; SI-NEXT: s_waitcnt lgkmcnt(0) 19; SI-NEXT: s_mov_b32 s8, s2 20; SI-NEXT: s_mov_b32 s9, s3 21; SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 22; SI-NEXT: s_mov_b32 s4, s0 23; SI-NEXT: s_mov_b32 s5, s1 24; SI-NEXT: s_waitcnt vmcnt(0) 25; SI-NEXT: v_lshl_b32_e32 v1, v1, v3 26; SI-NEXT: v_lshl_b32_e32 v0, v0, v2 27; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 28; SI-NEXT: s_endpgm 29; 30; VI-LABEL: shl_v2i32: 31; VI: ; %bb.0: 32; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 33; VI-NEXT: s_waitcnt lgkmcnt(0) 34; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 35; VI-NEXT: s_mov_b32 s3, 0xf000 36; VI-NEXT: s_mov_b32 s2, -1 37; VI-NEXT: s_waitcnt lgkmcnt(0) 38; VI-NEXT: s_lshl_b32 s5, s5, s7 39; VI-NEXT: s_lshl_b32 s4, s4, s6 40; VI-NEXT: v_mov_b32_e32 v0, s4 41; VI-NEXT: v_mov_b32_e32 v1, s5 42; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 43; VI-NEXT: s_endpgm 44; 45; EG-LABEL: shl_v2i32: 46; EG: ; %bb.0: 47; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 48; EG-NEXT: TEX 0 @6 49; EG-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[] 50; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 51; EG-NEXT: CF_END 52; EG-NEXT: PAD 53; EG-NEXT: Fetch clause starting at 6: 54; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 55; EG-NEXT: ALU clause starting at 8: 56; EG-NEXT: MOV * T0.X, KC0[2].Z, 57; EG-NEXT: ALU clause starting at 9: 58; EG-NEXT: LSHL * T0.Y, T0.Y, T0.W, 59; EG-NEXT: LSHL T0.X, T0.X, T0.Z, 60; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 61; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 62 %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 63 %a = load <2 x i32>, <2 x i32> addrspace(1)* %in 64 %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr 65 %result = shl <2 x i32> %a, %b 66 store <2 x i32> %result, <2 x i32> addrspace(1)* %out 67 ret void 68} 69 70define amdgpu_kernel void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 71; SI-LABEL: shl_v4i32: 72; SI: ; %bb.0: 73; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 74; SI-NEXT: s_mov_b32 s7, 0xf000 75; SI-NEXT: s_mov_b32 s6, -1 76; SI-NEXT: s_mov_b32 s10, s6 77; SI-NEXT: s_mov_b32 s11, s7 78; SI-NEXT: s_waitcnt lgkmcnt(0) 79; SI-NEXT: s_mov_b32 s8, s2 80; SI-NEXT: s_mov_b32 s9, s3 81; SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 82; SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 83; SI-NEXT: s_mov_b32 s4, s0 84; SI-NEXT: s_mov_b32 s5, s1 85; SI-NEXT: s_waitcnt vmcnt(0) 86; SI-NEXT: v_lshl_b32_e32 v3, v3, v7 87; SI-NEXT: v_lshl_b32_e32 v2, v2, v6 88; SI-NEXT: v_lshl_b32_e32 v1, v1, v5 89; SI-NEXT: v_lshl_b32_e32 v0, v0, v4 90; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 91; SI-NEXT: s_endpgm 92; 93; VI-LABEL: shl_v4i32: 94; VI: ; %bb.0: 95; VI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x24 96; VI-NEXT: s_waitcnt lgkmcnt(0) 97; VI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 98; VI-NEXT: s_mov_b32 s11, 0xf000 99; VI-NEXT: s_mov_b32 s10, -1 100; VI-NEXT: s_waitcnt lgkmcnt(0) 101; VI-NEXT: s_lshl_b32 s3, s3, s7 102; VI-NEXT: s_lshl_b32 s2, s2, s6 103; VI-NEXT: s_lshl_b32 s1, s1, s5 104; VI-NEXT: s_lshl_b32 s0, s0, s4 105; VI-NEXT: v_mov_b32_e32 v0, s0 106; VI-NEXT: v_mov_b32_e32 v1, s1 107; VI-NEXT: v_mov_b32_e32 v2, s2 108; VI-NEXT: v_mov_b32_e32 v3, s3 109; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 110; VI-NEXT: s_endpgm 111; 112; EG-LABEL: shl_v4i32: 113; EG: ; %bb.0: 114; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 115; EG-NEXT: TEX 1 @6 116; EG-NEXT: ALU 5, @11, KC0[CB0:0-32], KC1[] 117; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 118; EG-NEXT: CF_END 119; EG-NEXT: PAD 120; EG-NEXT: Fetch clause starting at 6: 121; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 122; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 123; EG-NEXT: ALU clause starting at 10: 124; EG-NEXT: MOV * T0.X, KC0[2].Z, 125; EG-NEXT: ALU clause starting at 11: 126; EG-NEXT: LSHL * T0.W, T0.W, T1.W, 127; EG-NEXT: LSHL * T0.Z, T0.Z, T1.Z, 128; EG-NEXT: LSHL * T0.Y, T0.Y, T1.Y, 129; EG-NEXT: LSHL T0.X, T0.X, T1.X, 130; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 131; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 132 %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 133 %a = load <4 x i32>, <4 x i32> addrspace(1)* %in 134 %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr 135 %result = shl <4 x i32> %a, %b 136 store <4 x i32> %result, <4 x i32> addrspace(1)* %out 137 ret void 138} 139 140define amdgpu_kernel void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 141; SI-LABEL: shl_i16: 142; SI: ; %bb.0: 143; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 144; SI-NEXT: s_mov_b32 s7, 0xf000 145; SI-NEXT: s_mov_b32 s6, -1 146; SI-NEXT: s_mov_b32 s10, s6 147; SI-NEXT: s_mov_b32 s11, s7 148; SI-NEXT: s_waitcnt lgkmcnt(0) 149; SI-NEXT: s_mov_b32 s8, s2 150; SI-NEXT: s_mov_b32 s9, s3 151; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 152; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 offset:2 153; SI-NEXT: s_mov_b32 s4, s0 154; SI-NEXT: s_mov_b32 s5, s1 155; SI-NEXT: s_waitcnt vmcnt(0) 156; SI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 157; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 158; SI-NEXT: s_endpgm 159; 160; VI-LABEL: shl_i16: 161; VI: ; %bb.0: 162; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 163; VI-NEXT: s_mov_b32 s7, 0xf000 164; VI-NEXT: s_mov_b32 s6, -1 165; VI-NEXT: s_mov_b32 s10, s6 166; VI-NEXT: s_mov_b32 s11, s7 167; VI-NEXT: s_waitcnt lgkmcnt(0) 168; VI-NEXT: s_mov_b32 s8, s2 169; VI-NEXT: s_mov_b32 s9, s3 170; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 171; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 offset:2 172; VI-NEXT: s_mov_b32 s4, s0 173; VI-NEXT: s_mov_b32 s5, s1 174; VI-NEXT: s_waitcnt vmcnt(0) 175; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 176; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 177; VI-NEXT: s_endpgm 178; 179; EG-LABEL: shl_i16: 180; EG: ; %bb.0: 181; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 182; EG-NEXT: TEX 1 @6 183; EG-NEXT: ALU 12, @11, KC0[CB0:0-32], KC1[] 184; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 185; EG-NEXT: CF_END 186; EG-NEXT: PAD 187; EG-NEXT: Fetch clause starting at 6: 188; EG-NEXT: VTX_READ_16 T1.X, T0.X, 2, #1 189; EG-NEXT: VTX_READ_16 T0.X, T0.X, 0, #1 190; EG-NEXT: ALU clause starting at 10: 191; EG-NEXT: MOV * T0.X, KC0[2].Z, 192; EG-NEXT: ALU clause starting at 11: 193; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 194; EG-NEXT: LSHL * T1.W, T0.X, T1.X, 195; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 196; EG-NEXT: AND_INT T1.W, PS, literal.x, 197; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 198; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 199; EG-NEXT: LSHL T0.X, PV.W, PS, 200; EG-NEXT: LSHL * T0.W, literal.x, PS, 201; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 202; EG-NEXT: MOV T0.Y, 0.0, 203; EG-NEXT: MOV * T0.Z, 0.0, 204; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 205; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 206 %b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1 207 %a = load i16, i16 addrspace(1)* %in 208 %b = load i16, i16 addrspace(1)* %b_ptr 209 %result = shl i16 %a, %b 210 store i16 %result, i16 addrspace(1)* %out 211 ret void 212} 213 214define amdgpu_kernel void @shl_i16_v_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) { 215; SI-LABEL: shl_i16_v_s: 216; SI: ; %bb.0: 217; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 218; SI-NEXT: s_load_dword s12, s[0:1], 0xd 219; SI-NEXT: s_mov_b32 s3, 0xf000 220; SI-NEXT: s_mov_b32 s2, -1 221; SI-NEXT: s_mov_b32 s10, s2 222; SI-NEXT: s_waitcnt lgkmcnt(0) 223; SI-NEXT: s_mov_b32 s8, s6 224; SI-NEXT: s_mov_b32 s9, s7 225; SI-NEXT: s_mov_b32 s11, s3 226; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 227; SI-NEXT: s_mov_b32 s0, s4 228; SI-NEXT: s_mov_b32 s1, s5 229; SI-NEXT: s_waitcnt vmcnt(0) 230; SI-NEXT: v_lshlrev_b32_e32 v0, s12, v0 231; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 232; SI-NEXT: s_endpgm 233; 234; VI-LABEL: shl_i16_v_s: 235; VI: ; %bb.0: 236; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 237; VI-NEXT: s_load_dword s12, s[0:1], 0x34 238; VI-NEXT: s_mov_b32 s3, 0xf000 239; VI-NEXT: s_mov_b32 s2, -1 240; VI-NEXT: s_mov_b32 s10, s2 241; VI-NEXT: s_waitcnt lgkmcnt(0) 242; VI-NEXT: s_mov_b32 s8, s6 243; VI-NEXT: s_mov_b32 s9, s7 244; VI-NEXT: s_mov_b32 s11, s3 245; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 246; VI-NEXT: s_mov_b32 s0, s4 247; VI-NEXT: s_mov_b32 s1, s5 248; VI-NEXT: s_waitcnt vmcnt(0) 249; VI-NEXT: v_lshlrev_b32_e32 v0, s12, v0 250; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 251; VI-NEXT: s_endpgm 252; 253; EG-LABEL: shl_i16_v_s: 254; EG: ; %bb.0: 255; EG-NEXT: ALU 1, @10, KC0[CB0:0-32], KC1[] 256; EG-NEXT: TEX 1 @6 257; EG-NEXT: ALU 12, @12, KC0[CB0:0-32], KC1[] 258; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 259; EG-NEXT: CF_END 260; EG-NEXT: PAD 261; EG-NEXT: Fetch clause starting at 6: 262; EG-NEXT: VTX_READ_16 T1.X, T1.X, 0, #1 263; EG-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3 264; EG-NEXT: ALU clause starting at 10: 265; EG-NEXT: MOV T0.X, 0.0, 266; EG-NEXT: MOV * T1.X, KC0[2].Z, 267; EG-NEXT: ALU clause starting at 12: 268; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 269; EG-NEXT: LSHL * T1.W, T1.X, T0.X, 270; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 271; EG-NEXT: AND_INT T1.W, PS, literal.x, 272; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 273; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 274; EG-NEXT: LSHL T0.X, PV.W, PS, 275; EG-NEXT: LSHL * T0.W, literal.x, PS, 276; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 277; EG-NEXT: MOV T0.Y, 0.0, 278; EG-NEXT: MOV * T0.Z, 0.0, 279; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 280; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 281 %a = load i16, i16 addrspace(1)* %in 282 %result = shl i16 %a, %b 283 store i16 %result, i16 addrspace(1)* %out 284 ret void 285} 286 287define amdgpu_kernel void @shl_i16_v_compute_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) { 288; SI-LABEL: shl_i16_v_compute_s: 289; SI: ; %bb.0: 290; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 291; SI-NEXT: s_load_dword s12, s[0:1], 0xd 292; SI-NEXT: s_mov_b32 s3, 0xf000 293; SI-NEXT: s_mov_b32 s2, -1 294; SI-NEXT: s_mov_b32 s10, s2 295; SI-NEXT: s_waitcnt lgkmcnt(0) 296; SI-NEXT: s_mov_b32 s8, s6 297; SI-NEXT: s_mov_b32 s9, s7 298; SI-NEXT: s_mov_b32 s11, s3 299; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 300; SI-NEXT: s_add_i32 s12, s12, 3 301; SI-NEXT: s_mov_b32 s0, s4 302; SI-NEXT: s_mov_b32 s1, s5 303; SI-NEXT: s_waitcnt vmcnt(0) 304; SI-NEXT: v_lshlrev_b32_e32 v0, s12, v0 305; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 306; SI-NEXT: s_endpgm 307; 308; VI-LABEL: shl_i16_v_compute_s: 309; VI: ; %bb.0: 310; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 311; VI-NEXT: s_load_dword s12, s[0:1], 0x34 312; VI-NEXT: s_mov_b32 s3, 0xf000 313; VI-NEXT: s_mov_b32 s2, -1 314; VI-NEXT: s_mov_b32 s10, s2 315; VI-NEXT: s_waitcnt lgkmcnt(0) 316; VI-NEXT: s_mov_b32 s8, s6 317; VI-NEXT: s_mov_b32 s9, s7 318; VI-NEXT: s_mov_b32 s11, s3 319; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 320; VI-NEXT: s_add_i32 s12, s12, 3 321; VI-NEXT: s_mov_b32 s0, s4 322; VI-NEXT: s_mov_b32 s1, s5 323; VI-NEXT: s_waitcnt vmcnt(0) 324; VI-NEXT: v_lshlrev_b32_e32 v0, s12, v0 325; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 326; VI-NEXT: s_endpgm 327; 328; EG-LABEL: shl_i16_v_compute_s: 329; EG: ; %bb.0: 330; EG-NEXT: ALU 0, @12, KC0[], KC1[] 331; EG-NEXT: TEX 0 @8 332; EG-NEXT: ALU 0, @13, KC0[CB0:0-32], KC1[] 333; EG-NEXT: TEX 0 @10 334; EG-NEXT: ALU 15, @14, KC0[CB0:0-32], KC1[] 335; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 336; EG-NEXT: CF_END 337; EG-NEXT: PAD 338; EG-NEXT: Fetch clause starting at 8: 339; EG-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3 340; EG-NEXT: Fetch clause starting at 10: 341; EG-NEXT: VTX_READ_16 T1.X, T1.X, 0, #1 342; EG-NEXT: ALU clause starting at 12: 343; EG-NEXT: MOV * T0.X, 0.0, 344; EG-NEXT: ALU clause starting at 13: 345; EG-NEXT: MOV * T1.X, KC0[2].Z, 346; EG-NEXT: ALU clause starting at 14: 347; EG-NEXT: ADD_INT * T0.W, T0.X, literal.x, 348; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 349; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 350; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.y, 351; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 352; EG-NEXT: LSHL * T0.W, T1.X, PV.W, 353; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 354; EG-NEXT: LSHL * T1.W, T1.W, literal.y, 355; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 356; EG-NEXT: LSHL T0.X, PV.W, PS, 357; EG-NEXT: LSHL * T0.W, literal.x, PS, 358; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 359; EG-NEXT: MOV T0.Y, 0.0, 360; EG-NEXT: MOV * T0.Z, 0.0, 361; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 362; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 363 %a = load i16, i16 addrspace(1)* %in 364 %b.add = add i16 %b, 3 365 %result = shl i16 %a, %b.add 366 store i16 %result, i16 addrspace(1)* %out 367 ret void 368} 369 370define amdgpu_kernel void @shl_i16_computed_amount(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 371; SI-LABEL: shl_i16_computed_amount: 372; SI: ; %bb.0: 373; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 374; SI-NEXT: s_mov_b32 s7, 0xf000 375; SI-NEXT: s_mov_b32 s6, -1 376; SI-NEXT: s_mov_b32 s10, s6 377; SI-NEXT: s_mov_b32 s11, s7 378; SI-NEXT: s_waitcnt lgkmcnt(0) 379; SI-NEXT: s_mov_b32 s8, s2 380; SI-NEXT: s_mov_b32 s9, s3 381; SI-NEXT: v_lshlrev_b32_e32 v0, 1, v0 382; SI-NEXT: v_mov_b32_e32 v1, 0 383; SI-NEXT: s_mov_b32 s14, 0 384; SI-NEXT: s_mov_b32 s15, s7 385; SI-NEXT: s_mov_b64 s[12:13], s[2:3] 386; SI-NEXT: buffer_load_ushort v2, off, s[8:11], 0 glc 387; SI-NEXT: s_waitcnt vmcnt(0) 388; SI-NEXT: buffer_load_ushort v0, v[0:1], s[12:15], 0 addr64 offset:2 glc 389; SI-NEXT: s_waitcnt vmcnt(0) 390; SI-NEXT: s_mov_b32 s4, s0 391; SI-NEXT: s_mov_b32 s5, s1 392; SI-NEXT: v_add_i32_e32 v0, vcc, 3, v0 393; SI-NEXT: v_lshlrev_b32_e32 v0, v0, v2 394; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 395; SI-NEXT: s_endpgm 396; 397; VI-LABEL: shl_i16_computed_amount: 398; VI: ; %bb.0: 399; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 400; VI-NEXT: v_lshlrev_b32_e32 v0, 1, v0 401; VI-NEXT: s_mov_b32 s7, 0xf000 402; VI-NEXT: s_mov_b32 s6, -1 403; VI-NEXT: s_mov_b32 s10, s6 404; VI-NEXT: s_waitcnt lgkmcnt(0) 405; VI-NEXT: v_mov_b32_e32 v1, s3 406; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 407; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 408; VI-NEXT: v_add_u32_e32 v0, vcc, 2, v0 409; VI-NEXT: s_mov_b32 s8, s2 410; VI-NEXT: s_mov_b32 s9, s3 411; VI-NEXT: s_mov_b32 s11, s7 412; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 413; VI-NEXT: buffer_load_ushort v2, off, s[8:11], 0 glc 414; VI-NEXT: s_waitcnt vmcnt(0) 415; VI-NEXT: flat_load_ushort v0, v[0:1] glc 416; VI-NEXT: s_waitcnt vmcnt(0) 417; VI-NEXT: s_mov_b32 s4, s0 418; VI-NEXT: s_mov_b32 s5, s1 419; VI-NEXT: v_add_u16_e32 v0, 3, v0 420; VI-NEXT: v_lshlrev_b16_e32 v0, v0, v2 421; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 422; VI-NEXT: s_endpgm 423; 424; EG-LABEL: shl_i16_computed_amount: 425; EG: ; %bb.0: 426; EG-NEXT: ALU 0, @12, KC0[CB0:0-32], KC1[] 427; EG-NEXT: TEX 0 @8 428; EG-NEXT: ALU 1, @13, KC0[CB0:0-32], KC1[] 429; EG-NEXT: TEX 0 @10 430; EG-NEXT: ALU 15, @15, KC0[CB0:0-32], KC1[] 431; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 432; EG-NEXT: CF_END 433; EG-NEXT: PAD 434; EG-NEXT: Fetch clause starting at 8: 435; EG-NEXT: VTX_READ_16 T1.X, T1.X, 0, #1 436; EG-NEXT: Fetch clause starting at 10: 437; EG-NEXT: VTX_READ_16 T0.X, T0.X, 2, #1 438; EG-NEXT: ALU clause starting at 12: 439; EG-NEXT: MOV * T1.X, KC0[2].Z, 440; EG-NEXT: ALU clause starting at 13: 441; EG-NEXT: LSHL * T0.W, T0.X, 1, 442; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, 443; EG-NEXT: ALU clause starting at 15: 444; EG-NEXT: ADD_INT * T0.W, T0.X, literal.x, 445; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 446; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 447; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.y, 448; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 449; EG-NEXT: LSHL * T0.W, T1.X, PV.W, 450; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 451; EG-NEXT: LSHL * T1.W, T1.W, literal.y, 452; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 453; EG-NEXT: LSHL T0.X, PV.W, PS, 454; EG-NEXT: LSHL * T0.W, literal.x, PS, 455; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 456; EG-NEXT: MOV T0.Y, 0.0, 457; EG-NEXT: MOV * T0.Z, 0.0, 458; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 459; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 460 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 461 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i32 %tid 462 %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid 463 %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i16 1 464 %a = load volatile i16, i16 addrspace(1)* %in 465 %b = load volatile i16, i16 addrspace(1)* %b_ptr 466 %b.add = add i16 %b, 3 467 %result = shl i16 %a, %b.add 468 store i16 %result, i16 addrspace(1)* %out 469 ret void 470} 471 472define amdgpu_kernel void @shl_i16_i_s(i16 addrspace(1)* %out, i16 zeroext %a) { 473; SI-LABEL: shl_i16_i_s: 474; SI: ; %bb.0: 475; SI-NEXT: s_load_dword s4, s[0:1], 0xb 476; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 477; SI-NEXT: s_mov_b32 s3, 0xf000 478; SI-NEXT: s_mov_b32 s2, -1 479; SI-NEXT: s_waitcnt lgkmcnt(0) 480; SI-NEXT: s_lshl_b32 s4, s4, 12 481; SI-NEXT: v_mov_b32_e32 v0, s4 482; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 483; SI-NEXT: s_endpgm 484; 485; VI-LABEL: shl_i16_i_s: 486; VI: ; %bb.0: 487; VI-NEXT: s_load_dword s4, s[0:1], 0x2c 488; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 489; VI-NEXT: s_mov_b32 s3, 0xf000 490; VI-NEXT: s_mov_b32 s2, -1 491; VI-NEXT: s_waitcnt lgkmcnt(0) 492; VI-NEXT: s_lshl_b32 s4, s4, 12 493; VI-NEXT: v_mov_b32_e32 v0, s4 494; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 495; VI-NEXT: s_endpgm 496; 497; EG-LABEL: shl_i16_i_s: 498; EG: ; %bb.0: 499; EG-NEXT: ALU 0, @8, KC0[], KC1[] 500; EG-NEXT: TEX 0 @6 501; EG-NEXT: ALU 14, @9, KC0[CB0:0-32], KC1[] 502; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 503; EG-NEXT: CF_END 504; EG-NEXT: PAD 505; EG-NEXT: Fetch clause starting at 6: 506; EG-NEXT: VTX_READ_16 T0.X, T0.X, 40, #3 507; EG-NEXT: ALU clause starting at 8: 508; EG-NEXT: MOV * T0.X, 0.0, 509; EG-NEXT: ALU clause starting at 9: 510; EG-NEXT: BFE_INT T0.W, T0.X, 0.0, literal.x, 511; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.y, 512; EG-NEXT: 16(2.242078e-44), 3(4.203895e-45) 513; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 514; EG-NEXT: 12(1.681558e-44), 0(0.000000e+00) 515; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 516; EG-NEXT: LSHL * T1.W, T1.W, literal.y, 517; EG-NEXT: 61440(8.609578e-41), 3(4.203895e-45) 518; EG-NEXT: LSHL T0.X, PV.W, PS, 519; EG-NEXT: LSHL * T0.W, literal.x, PS, 520; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 521; EG-NEXT: MOV T0.Y, 0.0, 522; EG-NEXT: MOV * T0.Z, 0.0, 523; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 524; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 525 %result = shl i16 %a, 12 526 store i16 %result, i16 addrspace(1)* %out 527 ret void 528} 529 530define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 531; SI-LABEL: shl_v2i16: 532; SI: ; %bb.0: 533; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 534; SI-NEXT: s_mov_b32 s7, 0xf000 535; SI-NEXT: s_mov_b32 s6, -1 536; SI-NEXT: s_mov_b32 s10, s6 537; SI-NEXT: s_mov_b32 s11, s7 538; SI-NEXT: s_waitcnt lgkmcnt(0) 539; SI-NEXT: s_mov_b32 s8, s2 540; SI-NEXT: s_mov_b32 s9, s3 541; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 542; SI-NEXT: v_mov_b32_e32 v1, 0 543; SI-NEXT: s_mov_b32 s14, 0 544; SI-NEXT: s_mov_b32 s15, s7 545; SI-NEXT: s_mov_b64 s[12:13], s[2:3] 546; SI-NEXT: buffer_load_dword v2, off, s[8:11], 0 547; SI-NEXT: buffer_load_dword v0, v[0:1], s[12:15], 0 addr64 offset:4 548; SI-NEXT: s_mov_b32 s4, s0 549; SI-NEXT: s_mov_b32 s5, s1 550; SI-NEXT: s_waitcnt vmcnt(1) 551; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v2 552; SI-NEXT: s_waitcnt vmcnt(0) 553; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v0 554; SI-NEXT: v_lshlrev_b32_e32 v0, v0, v2 555; SI-NEXT: v_lshlrev_b32_e32 v1, v3, v1 556; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 557; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 558; SI-NEXT: v_or_b32_e32 v0, v0, v1 559; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 560; SI-NEXT: s_endpgm 561; 562; VI-LABEL: shl_v2i16: 563; VI: ; %bb.0: 564; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 565; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 566; VI-NEXT: s_waitcnt lgkmcnt(0) 567; VI-NEXT: v_mov_b32_e32 v1, s3 568; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 569; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 570; VI-NEXT: v_add_u32_e32 v0, vcc, 4, v0 571; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 572; VI-NEXT: flat_load_dword v0, v[0:1] 573; VI-NEXT: s_load_dword s4, s[2:3], 0x0 574; VI-NEXT: s_mov_b32 s3, 0xf000 575; VI-NEXT: s_mov_b32 s2, -1 576; VI-NEXT: s_waitcnt lgkmcnt(0) 577; VI-NEXT: s_lshr_b32 s5, s4, 16 578; VI-NEXT: v_mov_b32_e32 v1, s5 579; VI-NEXT: s_waitcnt vmcnt(0) 580; VI-NEXT: v_lshlrev_b16_e64 v2, v0, s4 581; VI-NEXT: v_lshlrev_b16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 582; VI-NEXT: v_or_b32_e32 v0, v2, v0 583; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 584; VI-NEXT: s_endpgm 585; 586; EG-LABEL: shl_v2i16: 587; EG: ; %bb.0: 588; EG-NEXT: ALU 2, @12, KC0[CB0:0-32], KC1[] 589; EG-NEXT: TEX 0 @8 590; EG-NEXT: ALU 0, @15, KC0[CB0:0-32], KC1[] 591; EG-NEXT: TEX 0 @10 592; EG-NEXT: ALU 12, @16, KC0[CB0:0-32], KC1[] 593; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T7.X, 1 594; EG-NEXT: CF_END 595; EG-NEXT: PAD 596; EG-NEXT: Fetch clause starting at 8: 597; EG-NEXT: VTX_READ_32 T0.X, T0.X, 4, #1 598; EG-NEXT: Fetch clause starting at 10: 599; EG-NEXT: VTX_READ_32 T7.X, T7.X, 0, #1 600; EG-NEXT: ALU clause starting at 12: 601; EG-NEXT: LSHL * T0.W, T0.X, literal.x, 602; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 603; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, 604; EG-NEXT: ALU clause starting at 15: 605; EG-NEXT: MOV * T7.X, KC0[2].Z, 606; EG-NEXT: ALU clause starting at 16: 607; EG-NEXT: AND_INT T0.Y, T0.X, literal.x, 608; EG-NEXT: AND_INT T0.Z, T7.X, literal.x, BS:VEC_120/SCL_212 609; EG-NEXT: LSHR T0.W, T0.X, literal.y, 610; EG-NEXT: LSHR * T1.W, T7.X, literal.y, 611; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 612; EG-NEXT: LSHL T0.W, PS, PV.W, 613; EG-NEXT: LSHL * T1.W, PV.Z, PV.Y, 614; EG-NEXT: AND_INT T1.W, PS, literal.x, 615; EG-NEXT: LSHL * T0.W, PV.W, literal.y, 616; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 617; EG-NEXT: OR_INT T0.X, PV.W, PS, 618; EG-NEXT: LSHR * T7.X, KC0[2].Y, literal.x, 619; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 620 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 621 %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid 622 %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid 623 %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1 624 %a = load <2 x i16>, <2 x i16> addrspace(1)* %in 625 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr 626 %result = shl <2 x i16> %a, %b 627 store <2 x i16> %result, <2 x i16> addrspace(1)* %out 628 ret void 629} 630 631define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 632; SI-LABEL: shl_v4i16: 633; SI: ; %bb.0: 634; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 635; SI-NEXT: s_mov_b32 s7, 0xf000 636; SI-NEXT: s_mov_b32 s6, 0 637; SI-NEXT: v_lshlrev_b32_e32 v4, 3, v0 638; SI-NEXT: v_mov_b32_e32 v5, 0 639; SI-NEXT: s_waitcnt lgkmcnt(0) 640; SI-NEXT: s_mov_b64 s[4:5], s[2:3] 641; SI-NEXT: buffer_load_dwordx4 v[0:3], v[4:5], s[4:7], 0 addr64 642; SI-NEXT: s_mov_b32 s4, 0xffff 643; SI-NEXT: s_mov_b64 s[2:3], s[6:7] 644; SI-NEXT: s_waitcnt vmcnt(0) 645; SI-NEXT: v_lshrrev_b32_e32 v6, 16, v0 646; SI-NEXT: v_lshrrev_b32_e32 v7, 16, v1 647; SI-NEXT: v_lshrrev_b32_e32 v8, 16, v2 648; SI-NEXT: v_lshrrev_b32_e32 v9, 16, v3 649; SI-NEXT: v_lshlrev_b32_e32 v1, v3, v1 650; SI-NEXT: v_lshlrev_b32_e32 v0, v2, v0 651; SI-NEXT: v_lshlrev_b32_e32 v2, v9, v7 652; SI-NEXT: v_lshlrev_b32_e32 v3, v8, v6 653; SI-NEXT: v_and_b32_e32 v1, s4, v1 654; SI-NEXT: v_and_b32_e32 v0, s4, v0 655; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 656; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 657; SI-NEXT: v_or_b32_e32 v1, v1, v2 658; SI-NEXT: v_or_b32_e32 v0, v0, v3 659; SI-NEXT: buffer_store_dwordx2 v[0:1], v[4:5], s[0:3], 0 addr64 660; SI-NEXT: s_endpgm 661; 662; VI-LABEL: shl_v4i16: 663; VI: ; %bb.0: 664; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 665; VI-NEXT: v_lshlrev_b32_e32 v4, 3, v0 666; VI-NEXT: s_waitcnt lgkmcnt(0) 667; VI-NEXT: v_mov_b32_e32 v1, s3 668; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4 669; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 670; VI-NEXT: flat_load_dwordx4 v[0:3], v[0:1] 671; VI-NEXT: v_mov_b32_e32 v5, s1 672; VI-NEXT: v_add_u32_e32 v4, vcc, s0, v4 673; VI-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc 674; VI-NEXT: s_waitcnt vmcnt(0) 675; VI-NEXT: v_lshlrev_b16_e32 v6, v3, v1 676; VI-NEXT: v_lshlrev_b16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 677; VI-NEXT: v_lshlrev_b16_e32 v3, v2, v0 678; VI-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 679; VI-NEXT: v_or_b32_e32 v1, v6, v1 680; VI-NEXT: v_or_b32_e32 v0, v3, v0 681; VI-NEXT: flat_store_dwordx2 v[4:5], v[0:1] 682; VI-NEXT: s_endpgm 683; 684; EG-LABEL: shl_v4i16: 685; EG: ; %bb.0: 686; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[] 687; EG-NEXT: TEX 0 @6 688; EG-NEXT: ALU 53, @11, KC0[CB0:0-32], KC1[] 689; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T10.XY, T0.X, 1 690; EG-NEXT: CF_END 691; EG-NEXT: PAD 692; EG-NEXT: Fetch clause starting at 6: 693; EG-NEXT: VTX_READ_128 T10.XYZW, T0.X, 0, #1 694; EG-NEXT: ALU clause starting at 8: 695; EG-NEXT: LSHL * T0.W, T0.X, literal.x, 696; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 697; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, 698; EG-NEXT: ALU clause starting at 11: 699; EG-NEXT: MOV T4.X, T10.X, 700; EG-NEXT: MOV * T5.X, T10.Y, 701; EG-NEXT: MOV T0.X, PV.X, 702; EG-NEXT: MOV T0.Y, PS, 703; EG-NEXT: MOV * T2.X, T10.Z, 704; EG-NEXT: MOV T3.X, T10.W, 705; EG-NEXT: MOV * T0.Z, T6.X, 706; EG-NEXT: MOV * T1.Y, T2.X, 707; EG-NEXT: AND_INT T1.W, PV.Y, literal.x, 708; EG-NEXT: AND_INT * T2.W, T0.X, literal.x, 709; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 710; EG-NEXT: LSHL * T1.W, PS, PV.W, 711; EG-NEXT: AND_INT T1.W, PV.W, literal.x, 712; EG-NEXT: AND_INT * T2.W, T0.Z, literal.y, 713; EG-NEXT: 65535(9.183409e-41), -65536(nan) 714; EG-NEXT: OR_INT * T1.W, PS, PV.W, 715; EG-NEXT: MOV * T0.Z, T3.X, 716; EG-NEXT: MOV * T6.X, T1.W, 717; EG-NEXT: MOV T1.Z, PV.X, 718; EG-NEXT: LSHR T1.W, T1.Y, literal.x, 719; EG-NEXT: LSHR * T2.W, T0.X, literal.x, 720; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 721; EG-NEXT: LSHL T1.W, PS, PV.W, 722; EG-NEXT: AND_INT * T2.W, PV.Z, literal.x, 723; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 724; EG-NEXT: LSHL * T1.W, PV.W, literal.x, 725; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 726; EG-NEXT: OR_INT * T1.W, T2.W, PV.W, 727; EG-NEXT: MOV T6.X, PV.W, 728; EG-NEXT: MOV * T0.X, T7.X, 729; EG-NEXT: AND_INT T1.W, T0.Z, literal.x, 730; EG-NEXT: AND_INT * T2.W, T0.Y, literal.x, 731; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 732; EG-NEXT: LSHL T1.W, PS, PV.W, 733; EG-NEXT: AND_INT * T2.W, T0.X, literal.x, 734; EG-NEXT: -65536(nan), 0(0.000000e+00) 735; EG-NEXT: AND_INT * T1.W, PV.W, literal.x, 736; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 737; EG-NEXT: OR_INT * T1.W, T2.W, PV.W, 738; EG-NEXT: MOV * T7.X, PV.W, 739; EG-NEXT: MOV T0.X, PV.X, 740; EG-NEXT: LSHR T1.W, T0.Z, literal.x, 741; EG-NEXT: LSHR * T2.W, T0.Y, literal.x, 742; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 743; EG-NEXT: LSHL * T1.W, PS, PV.W, 744; EG-NEXT: AND_INT T0.Z, T0.X, literal.x, 745; EG-NEXT: LSHL T1.W, PV.W, literal.y, 746; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W, 747; EG-NEXT: 65535(9.183409e-41), 16(2.242078e-44) 748; EG-NEXT: LSHR T0.X, PS, literal.x, 749; EG-NEXT: OR_INT * T10.Y, PV.Z, PV.W, 750; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 751; EG-NEXT: MOV T7.X, PV.Y, 752; EG-NEXT: MOV * T10.X, T6.X, 753 %tid = call i32 @llvm.amdgcn.workitem.id.x() #0 754 %gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid 755 %gep.out = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i32 %tid 756 %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1 757 %a = load <4 x i16>, <4 x i16> addrspace(1)* %gep 758 %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr 759 %result = shl <4 x i16> %a, %b 760 store <4 x i16> %result, <4 x i16> addrspace(1)* %gep.out 761 ret void 762} 763 764define amdgpu_kernel void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { 765; SI-LABEL: shl_i64: 766; SI: ; %bb.0: 767; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 768; SI-NEXT: s_mov_b32 s7, 0xf000 769; SI-NEXT: s_mov_b32 s6, -1 770; SI-NEXT: s_mov_b32 s10, s6 771; SI-NEXT: s_mov_b32 s11, s7 772; SI-NEXT: s_waitcnt lgkmcnt(0) 773; SI-NEXT: s_mov_b32 s8, s2 774; SI-NEXT: s_mov_b32 s9, s3 775; SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 776; SI-NEXT: s_mov_b32 s4, s0 777; SI-NEXT: s_mov_b32 s5, s1 778; SI-NEXT: s_waitcnt vmcnt(0) 779; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v2 780; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 781; SI-NEXT: s_endpgm 782; 783; VI-LABEL: shl_i64: 784; VI: ; %bb.0: 785; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 786; VI-NEXT: s_waitcnt lgkmcnt(0) 787; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 788; VI-NEXT: s_mov_b32 s3, 0xf000 789; VI-NEXT: s_mov_b32 s2, -1 790; VI-NEXT: s_waitcnt lgkmcnt(0) 791; VI-NEXT: s_lshl_b64 s[4:5], s[4:5], s6 792; VI-NEXT: v_mov_b32_e32 v0, s4 793; VI-NEXT: v_mov_b32_e32 v1, s5 794; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 795; VI-NEXT: s_endpgm 796; 797; EG-LABEL: shl_i64: 798; EG: ; %bb.0: 799; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 800; EG-NEXT: TEX 0 @6 801; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] 802; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 803; EG-NEXT: CF_END 804; EG-NEXT: PAD 805; EG-NEXT: Fetch clause starting at 6: 806; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 807; EG-NEXT: ALU clause starting at 8: 808; EG-NEXT: MOV * T0.X, KC0[2].Z, 809; EG-NEXT: ALU clause starting at 9: 810; EG-NEXT: AND_INT T1.Y, T0.Z, literal.x, 811; EG-NEXT: LSHR T1.Z, T0.Y, 1, 812; EG-NEXT: BIT_ALIGN_INT T0.W, T0.Y, T0.X, 1, 813; EG-NEXT: NOT_INT * T1.W, T0.Z, 814; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 815; EG-NEXT: BIT_ALIGN_INT T1.Z, PV.Z, PV.W, PS, 816; EG-NEXT: LSHL T0.W, T0.X, PV.Y, 817; EG-NEXT: AND_INT * T1.W, T0.Z, literal.x, 818; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 819; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W, 820; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, 821; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 822; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 823 %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 824 %a = load i64, i64 addrspace(1)* %in 825 %b = load i64, i64 addrspace(1)* %b_ptr 826 %result = shl i64 %a, %b 827 store i64 %result, i64 addrspace(1)* %out 828 ret void 829} 830 831define amdgpu_kernel void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { 832; SI-LABEL: shl_v2i64: 833; SI: ; %bb.0: 834; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 835; SI-NEXT: s_mov_b32 s7, 0xf000 836; SI-NEXT: s_mov_b32 s6, -1 837; SI-NEXT: s_mov_b32 s10, s6 838; SI-NEXT: s_mov_b32 s11, s7 839; SI-NEXT: s_waitcnt lgkmcnt(0) 840; SI-NEXT: s_mov_b32 s8, s2 841; SI-NEXT: s_mov_b32 s9, s3 842; SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 843; SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16 844; SI-NEXT: s_mov_b32 s4, s0 845; SI-NEXT: s_mov_b32 s5, s1 846; SI-NEXT: s_waitcnt vmcnt(0) 847; SI-NEXT: v_lshl_b64 v[2:3], v[2:3], v6 848; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v4 849; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 850; SI-NEXT: s_endpgm 851; 852; VI-LABEL: shl_v2i64: 853; VI: ; %bb.0: 854; VI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x24 855; VI-NEXT: s_waitcnt lgkmcnt(0) 856; VI-NEXT: s_load_dwordx8 s[0:7], s[10:11], 0x0 857; VI-NEXT: s_mov_b32 s11, 0xf000 858; VI-NEXT: s_mov_b32 s10, -1 859; VI-NEXT: s_waitcnt lgkmcnt(0) 860; VI-NEXT: s_lshl_b64 s[2:3], s[2:3], s6 861; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 862; VI-NEXT: v_mov_b32_e32 v0, s0 863; VI-NEXT: v_mov_b32_e32 v1, s1 864; VI-NEXT: v_mov_b32_e32 v2, s2 865; VI-NEXT: v_mov_b32_e32 v3, s3 866; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0 867; VI-NEXT: s_endpgm 868; 869; EG-LABEL: shl_v2i64: 870; EG: ; %bb.0: 871; EG-NEXT: ALU 0, @10, KC0[CB0:0-32], KC1[] 872; EG-NEXT: TEX 1 @6 873; EG-NEXT: ALU 22, @11, KC0[CB0:0-32], KC1[] 874; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T3.XYZW, T0.X, 1 875; EG-NEXT: CF_END 876; EG-NEXT: PAD 877; EG-NEXT: Fetch clause starting at 6: 878; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 16, #1 879; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 880; EG-NEXT: ALU clause starting at 10: 881; EG-NEXT: MOV * T0.X, KC0[2].Z, 882; EG-NEXT: ALU clause starting at 11: 883; EG-NEXT: AND_INT T1.Y, T1.Z, literal.x, 884; EG-NEXT: LSHR T2.Z, T0.W, 1, 885; EG-NEXT: BIT_ALIGN_INT T0.W, T0.W, T0.Z, 1, 886; EG-NEXT: NOT_INT * T1.W, T1.Z, 887; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 888; EG-NEXT: BIT_ALIGN_INT T0.W, PV.Z, PV.W, PS, 889; EG-NEXT: LSHL * T1.W, T0.Z, PV.Y, 890; EG-NEXT: AND_INT T2.X, T1.Z, literal.x, 891; EG-NEXT: AND_INT T1.Y, T1.X, literal.y, 892; EG-NEXT: LSHR T0.Z, T0.Y, 1, 893; EG-NEXT: BIT_ALIGN_INT T2.W, T0.Y, T0.X, 1, 894; EG-NEXT: NOT_INT * T3.W, T1.X, 895; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44) 896; EG-NEXT: BIT_ALIGN_INT T0.Y, PV.Z, PV.W, PS, 897; EG-NEXT: LSHL T0.Z, T0.X, PV.Y, 898; EG-NEXT: AND_INT T2.W, T1.X, literal.x, BS:VEC_120/SCL_212 899; EG-NEXT: CNDE_INT * T3.W, PV.X, T0.W, T1.W, 900; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 901; EG-NEXT: CNDE_INT T3.Y, PV.W, PV.Y, PV.Z, 902; EG-NEXT: CNDE_INT * T3.Z, T2.X, T1.W, 0.0, 903; EG-NEXT: CNDE_INT T3.X, T2.W, T0.Z, 0.0, 904; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 905; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 906 %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1 907 %a = load <2 x i64>, <2 x i64> addrspace(1)* %in 908 %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr 909 %result = shl <2 x i64> %a, %b 910 store <2 x i64> %result, <2 x i64> addrspace(1)* %out 911 ret void 912} 913 914define amdgpu_kernel void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { 915; SI-LABEL: shl_v4i64: 916; SI: ; %bb.0: 917; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 918; SI-NEXT: s_mov_b32 s3, 0xf000 919; SI-NEXT: s_mov_b32 s2, -1 920; SI-NEXT: s_mov_b32 s10, s2 921; SI-NEXT: s_mov_b32 s11, s3 922; SI-NEXT: s_waitcnt lgkmcnt(0) 923; SI-NEXT: s_mov_b32 s8, s6 924; SI-NEXT: s_mov_b32 s9, s7 925; SI-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 offset:16 926; SI-NEXT: buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:48 927; SI-NEXT: buffer_load_dwordx4 v[7:10], off, s[8:11], 0 928; SI-NEXT: buffer_load_dwordx4 v[11:14], off, s[8:11], 0 offset:32 929; SI-NEXT: s_mov_b32 s0, s4 930; SI-NEXT: s_mov_b32 s1, s5 931; SI-NEXT: s_waitcnt vmcnt(2) 932; SI-NEXT: v_lshl_b64 v[2:3], v[2:3], v6 933; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v4 934; SI-NEXT: s_waitcnt vmcnt(0) 935; SI-NEXT: v_lshl_b64 v[9:10], v[9:10], v13 936; SI-NEXT: v_lshl_b64 v[7:8], v[7:8], v11 937; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:16 938; SI-NEXT: buffer_store_dwordx4 v[7:10], off, s[0:3], 0 939; SI-NEXT: s_endpgm 940; 941; VI-LABEL: shl_v4i64: 942; VI: ; %bb.0: 943; VI-NEXT: s_load_dwordx4 s[16:19], s[0:1], 0x24 944; VI-NEXT: s_waitcnt lgkmcnt(0) 945; VI-NEXT: s_load_dwordx16 s[0:15], s[18:19], 0x0 946; VI-NEXT: s_mov_b32 s19, 0xf000 947; VI-NEXT: s_mov_b32 s18, -1 948; VI-NEXT: s_waitcnt lgkmcnt(0) 949; VI-NEXT: s_lshl_b64 s[6:7], s[6:7], s14 950; VI-NEXT: s_lshl_b64 s[4:5], s[4:5], s12 951; VI-NEXT: s_lshl_b64 s[2:3], s[2:3], s10 952; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 953; VI-NEXT: v_mov_b32_e32 v0, s4 954; VI-NEXT: v_mov_b32_e32 v1, s5 955; VI-NEXT: v_mov_b32_e32 v2, s6 956; VI-NEXT: v_mov_b32_e32 v3, s7 957; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 offset:16 958; VI-NEXT: s_nop 0 959; VI-NEXT: v_mov_b32_e32 v0, s0 960; VI-NEXT: v_mov_b32_e32 v1, s1 961; VI-NEXT: v_mov_b32_e32 v2, s2 962; VI-NEXT: v_mov_b32_e32 v3, s3 963; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 964; VI-NEXT: s_endpgm 965; 966; EG-LABEL: shl_v4i64: 967; EG: ; %bb.0: 968; EG-NEXT: ALU 0, @14, KC0[CB0:0-32], KC1[] 969; EG-NEXT: TEX 3 @6 970; EG-NEXT: ALU 47, @15, KC0[CB0:0-32], KC1[] 971; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T2.X, 0 972; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 1 973; EG-NEXT: CF_END 974; EG-NEXT: Fetch clause starting at 6: 975; EG-NEXT: VTX_READ_128 T1.XYZW, T0.X, 48, #1 976; EG-NEXT: VTX_READ_128 T2.XYZW, T0.X, 0, #1 977; EG-NEXT: VTX_READ_128 T3.XYZW, T0.X, 32, #1 978; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 16, #1 979; EG-NEXT: ALU clause starting at 14: 980; EG-NEXT: MOV * T0.X, KC0[2].Z, 981; EG-NEXT: ALU clause starting at 15: 982; EG-NEXT: AND_INT T4.Z, T1.Z, literal.x, 983; EG-NEXT: LSHR T1.W, T0.W, 1, 984; EG-NEXT: NOT_INT * T3.W, T1.Z, 985; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 986; EG-NEXT: BIT_ALIGN_INT T4.X, T0.W, T0.Z, 1, 987; EG-NEXT: AND_INT T1.Y, T3.Z, literal.x, BS:VEC_201 988; EG-NEXT: LSHR T5.Z, T2.W, 1, BS:VEC_120/SCL_212 989; EG-NEXT: BIT_ALIGN_INT T0.W, T2.W, T2.Z, 1, BS:VEC_102/SCL_221 990; EG-NEXT: NOT_INT * T2.W, T3.Z, 991; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 992; EG-NEXT: BIT_ALIGN_INT T3.Y, PV.Z, PV.W, PS, 993; EG-NEXT: LSHL T2.Z, T2.Z, PV.Y, 994; EG-NEXT: BIT_ALIGN_INT T0.W, T1.W, PV.X, T3.W, 995; EG-NEXT: LSHL * T1.W, T0.Z, T4.Z, 996; EG-NEXT: AND_INT T4.X, T1.Z, literal.x, 997; EG-NEXT: AND_INT T1.Y, T1.X, literal.y, 998; EG-NEXT: LSHR T0.Z, T0.Y, 1, 999; EG-NEXT: BIT_ALIGN_INT T2.W, T0.Y, T0.X, 1, 1000; EG-NEXT: NOT_INT * T3.W, T1.X, 1001; EG-NEXT: 32(4.484155e-44), 31(4.344025e-44) 1002; EG-NEXT: AND_INT T5.X, T3.Z, literal.x, 1003; EG-NEXT: BIT_ALIGN_INT T0.Y, PV.Z, PV.W, PS, 1004; EG-NEXT: LSHL T0.Z, T0.X, PV.Y, 1005; EG-NEXT: AND_INT T2.W, T1.X, literal.x, BS:VEC_120/SCL_212 1006; EG-NEXT: CNDE_INT * T4.W, PV.X, T0.W, T1.W, 1007; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1008; EG-NEXT: AND_INT T0.X, T3.X, literal.x, 1009; EG-NEXT: CNDE_INT T4.Y, PV.W, PV.Y, PV.Z, 1010; EG-NEXT: LSHR T1.Z, T2.Y, 1, 1011; EG-NEXT: BIT_ALIGN_INT T0.W, T2.Y, T2.X, 1, 1012; EG-NEXT: NOT_INT * T3.W, T3.X, 1013; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 1014; EG-NEXT: BIT_ALIGN_INT T1.X, PV.Z, PV.W, PS, 1015; EG-NEXT: LSHL T0.Y, T2.X, PV.X, 1016; EG-NEXT: CNDE_INT T4.Z, T4.X, T1.W, 0.0, BS:VEC_120/SCL_212 1017; EG-NEXT: AND_INT * T0.W, T3.X, literal.x, BS:VEC_201 1018; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1019; EG-NEXT: CNDE_INT * T1.W, T5.X, T3.Y, T2.Z, 1020; EG-NEXT: CNDE_INT T4.X, T2.W, T0.Z, 0.0, 1021; EG-NEXT: CNDE_INT T1.Y, T0.W, T1.X, T0.Y, BS:VEC_120/SCL_212 1022; EG-NEXT: ADD_INT * T2.W, KC0[2].Y, literal.x, 1023; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 1024; EG-NEXT: LSHR T0.X, PV.W, literal.x, 1025; EG-NEXT: CNDE_INT T1.Z, T5.X, T2.Z, 0.0, 1026; EG-NEXT: CNDE_INT * T1.X, T0.W, T0.Y, 0.0, 1027; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1028; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, 1029; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1030 %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1 1031 %a = load <4 x i64>, <4 x i64> addrspace(1)* %in 1032 %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr 1033 %result = shl <4 x i64> %a, %b 1034 store <4 x i64> %result, <4 x i64> addrspace(1)* %out 1035 ret void 1036} 1037 1038; Make sure load width gets reduced to i32 load. 1039define amdgpu_kernel void @s_shl_32_i64(i64 addrspace(1)* %out, [8 x i32], i64 %a) { 1040; SI-LABEL: s_shl_32_i64: 1041; SI: ; %bb.0: 1042; SI-NEXT: s_load_dword s4, s[0:1], 0x13 1043; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1044; SI-NEXT: s_mov_b32 s3, 0xf000 1045; SI-NEXT: s_mov_b32 s2, -1 1046; SI-NEXT: v_mov_b32_e32 v0, 0 1047; SI-NEXT: s_waitcnt lgkmcnt(0) 1048; SI-NEXT: v_mov_b32_e32 v1, s4 1049; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1050; SI-NEXT: s_endpgm 1051; 1052; VI-LABEL: s_shl_32_i64: 1053; VI: ; %bb.0: 1054; VI-NEXT: s_load_dword s4, s[0:1], 0x4c 1055; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1056; VI-NEXT: s_mov_b32 s3, 0xf000 1057; VI-NEXT: s_mov_b32 s2, -1 1058; VI-NEXT: v_mov_b32_e32 v0, 0 1059; VI-NEXT: s_waitcnt lgkmcnt(0) 1060; VI-NEXT: v_mov_b32_e32 v1, s4 1061; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1062; VI-NEXT: s_endpgm 1063; 1064; EG-LABEL: s_shl_32_i64: 1065; EG: ; %bb.0: 1066; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 1067; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1068; EG-NEXT: CF_END 1069; EG-NEXT: PAD 1070; EG-NEXT: ALU clause starting at 4: 1071; EG-NEXT: MOV * T0.Y, KC0[4].W, 1072; EG-NEXT: MOV T0.X, 0.0, 1073; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1074; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1075 %result = shl i64 %a, 32 1076 store i64 %result, i64 addrspace(1)* %out 1077 ret void 1078} 1079 1080define amdgpu_kernel void @v_shl_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { 1081; SI-LABEL: v_shl_32_i64: 1082; SI: ; %bb.0: 1083; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9 1084; SI-NEXT: s_ashr_i32 s3, s2, 31 1085; SI-NEXT: s_lshl_b64 s[0:1], s[2:3], 3 1086; SI-NEXT: v_mov_b32_e32 v0, s0 1087; SI-NEXT: s_mov_b32 s11, 0xf000 1088; SI-NEXT: s_mov_b32 s10, 0 1089; SI-NEXT: s_waitcnt lgkmcnt(0) 1090; SI-NEXT: s_mov_b64 s[8:9], s[6:7] 1091; SI-NEXT: v_mov_b32_e32 v1, s1 1092; SI-NEXT: buffer_load_dword v3, v[0:1], s[8:11], 0 addr64 1093; SI-NEXT: s_mov_b64 s[6:7], s[10:11] 1094; SI-NEXT: v_mov_b32_e32 v2, 0 1095; SI-NEXT: s_waitcnt vmcnt(0) 1096; SI-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64 1097; SI-NEXT: s_endpgm 1098; 1099; VI-LABEL: v_shl_32_i64: 1100; VI: ; %bb.0: 1101; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 1102; VI-NEXT: s_ashr_i32 s3, s2, 31 1103; VI-NEXT: s_lshl_b64 s[0:1], s[2:3], 3 1104; VI-NEXT: v_mov_b32_e32 v0, 0 1105; VI-NEXT: s_waitcnt lgkmcnt(0) 1106; VI-NEXT: s_add_u32 s2, s6, s0 1107; VI-NEXT: s_addc_u32 s3, s7, s1 1108; VI-NEXT: s_load_dword s2, s[2:3], 0x0 1109; VI-NEXT: s_add_u32 s0, s4, s0 1110; VI-NEXT: s_addc_u32 s1, s5, s1 1111; VI-NEXT: v_mov_b32_e32 v3, s1 1112; VI-NEXT: v_mov_b32_e32 v2, s0 1113; VI-NEXT: s_waitcnt lgkmcnt(0) 1114; VI-NEXT: v_mov_b32_e32 v1, s2 1115; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1116; VI-NEXT: s_endpgm 1117; 1118; EG-LABEL: v_shl_32_i64: 1119; EG: ; %bb.0: 1120; EG-NEXT: ALU 2, @8, KC0[CB0:0-32], KC1[] 1121; EG-NEXT: TEX 0 @6 1122; EG-NEXT: ALU 4, @11, KC0[CB0:0-32], KC1[] 1123; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XY, T2.X, 1 1124; EG-NEXT: CF_END 1125; EG-NEXT: PAD 1126; EG-NEXT: Fetch clause starting at 6: 1127; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 1128; EG-NEXT: ALU clause starting at 8: 1129; EG-NEXT: LSHL * T0.W, T1.X, literal.x, 1130; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 1131; EG-NEXT: ADD_INT * T0.X, KC0[2].Z, PV.W, 1132; EG-NEXT: ALU clause starting at 11: 1133; EG-NEXT: MOV T1.X, 0.0, 1134; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.W, 1135; EG-NEXT: LSHR T2.X, PV.W, literal.x, 1136; EG-NEXT: MOV * T1.Y, T0.X, 1137; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1138 %tid = call i32 @llvm.amdgcn.workgroup.id.x() #0 1139 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 1140 %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid 1141 %a = load i64, i64 addrspace(1)* %gep.in 1142 %result = shl i64 %a, 32 1143 store i64 %result, i64 addrspace(1)* %gep.out 1144 ret void 1145} 1146 1147define amdgpu_kernel void @s_shl_constant_i64(i64 addrspace(1)* %out, i64 %a) { 1148; SI-LABEL: s_shl_constant_i64: 1149; SI: ; %bb.0: 1150; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1151; SI-NEXT: s_mov_b32 s6, -1 1152; SI-NEXT: s_mov_b32 s9, 0xffff 1153; SI-NEXT: s_mov_b32 s8, s6 1154; SI-NEXT: s_mov_b32 s7, 0xf000 1155; SI-NEXT: s_waitcnt lgkmcnt(0) 1156; SI-NEXT: s_mov_b32 s4, s0 1157; SI-NEXT: s_mov_b32 s5, s1 1158; SI-NEXT: s_lshl_b64 s[0:1], s[8:9], s2 1159; SI-NEXT: v_mov_b32_e32 v0, s0 1160; SI-NEXT: v_mov_b32_e32 v1, s1 1161; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1162; SI-NEXT: s_endpgm 1163; 1164; VI-LABEL: s_shl_constant_i64: 1165; VI: ; %bb.0: 1166; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1167; VI-NEXT: s_mov_b32 s6, -1 1168; VI-NEXT: s_mov_b32 s9, 0xffff 1169; VI-NEXT: s_mov_b32 s8, s6 1170; VI-NEXT: s_mov_b32 s7, 0xf000 1171; VI-NEXT: s_waitcnt lgkmcnt(0) 1172; VI-NEXT: s_mov_b32 s4, s0 1173; VI-NEXT: s_mov_b32 s5, s1 1174; VI-NEXT: s_lshl_b64 s[0:1], s[8:9], s2 1175; VI-NEXT: v_mov_b32_e32 v0, s0 1176; VI-NEXT: v_mov_b32_e32 v1, s1 1177; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1178; VI-NEXT: s_endpgm 1179; 1180; EG-LABEL: s_shl_constant_i64: 1181; EG: ; %bb.0: 1182; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] 1183; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1184; EG-NEXT: CF_END 1185; EG-NEXT: PAD 1186; EG-NEXT: ALU clause starting at 4: 1187; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x, 1188; EG-NEXT: MOV T0.W, literal.y, 1189; EG-NEXT: NOT_INT * T1.W, KC0[2].W, 1190; EG-NEXT: 31(4.344025e-44), -1(nan) 1191; EG-NEXT: BIT_ALIGN_INT T1.Z, literal.x, PV.W, PS, 1192; EG-NEXT: LSHL T0.W, literal.y, PV.Z, 1193; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.z, 1194; EG-NEXT: 32767(4.591635e-41), -1(nan) 1195; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1196; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W, 1197; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, 1198; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1199; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1200 %shl = shl i64 281474976710655, %a 1201 store i64 %shl, i64 addrspace(1)* %out, align 8 1202 ret void 1203} 1204 1205define amdgpu_kernel void @v_shl_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { 1206; SI-LABEL: v_shl_constant_i64: 1207; SI: ; %bb.0: 1208; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1209; SI-NEXT: s_mov_b32 s7, 0xf000 1210; SI-NEXT: s_mov_b32 s6, -1 1211; SI-NEXT: s_mov_b32 s10, s6 1212; SI-NEXT: s_mov_b32 s11, s7 1213; SI-NEXT: s_waitcnt lgkmcnt(0) 1214; SI-NEXT: s_mov_b32 s8, s2 1215; SI-NEXT: s_mov_b32 s9, s3 1216; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1217; SI-NEXT: s_mov_b32 s2, 0xab19b207 1218; SI-NEXT: s_movk_i32 s3, 0x11e 1219; SI-NEXT: s_mov_b32 s4, s0 1220; SI-NEXT: s_mov_b32 s5, s1 1221; SI-NEXT: s_waitcnt vmcnt(0) 1222; SI-NEXT: v_lshl_b64 v[0:1], s[2:3], v0 1223; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1224; SI-NEXT: s_endpgm 1225; 1226; VI-LABEL: v_shl_constant_i64: 1227; VI: ; %bb.0: 1228; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1229; VI-NEXT: s_mov_b32 s7, 0xf000 1230; VI-NEXT: s_mov_b32 s6, -1 1231; VI-NEXT: s_waitcnt lgkmcnt(0) 1232; VI-NEXT: s_load_dword s2, s[2:3], 0x0 1233; VI-NEXT: s_mov_b32 s4, s0 1234; VI-NEXT: s_mov_b32 s5, s1 1235; VI-NEXT: s_mov_b32 s0, 0xab19b207 1236; VI-NEXT: s_movk_i32 s1, 0x11e 1237; VI-NEXT: s_waitcnt lgkmcnt(0) 1238; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 1239; VI-NEXT: v_mov_b32_e32 v0, s0 1240; VI-NEXT: v_mov_b32_e32 v1, s1 1241; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1242; VI-NEXT: s_endpgm 1243; 1244; EG-LABEL: v_shl_constant_i64: 1245; EG: ; %bb.0: 1246; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1247; EG-NEXT: TEX 0 @6 1248; EG-NEXT: ALU 12, @9, KC0[CB0:0-32], KC1[] 1249; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1250; EG-NEXT: CF_END 1251; EG-NEXT: PAD 1252; EG-NEXT: Fetch clause starting at 6: 1253; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 1254; EG-NEXT: ALU clause starting at 8: 1255; EG-NEXT: MOV * T0.X, KC0[2].Z, 1256; EG-NEXT: ALU clause starting at 9: 1257; EG-NEXT: NOT_INT T0.Z, T0.X, 1258; EG-NEXT: MOV T0.W, literal.x, 1259; EG-NEXT: AND_INT * T1.W, T0.X, literal.y, 1260; EG-NEXT: 1435293955(1.935796e+13), 31(4.344025e-44) 1261; EG-NEXT: LSHL T1.Z, literal.x, PS, 1262; EG-NEXT: BIT_ALIGN_INT T0.W, literal.y, PV.W, PV.Z, 1263; EG-NEXT: AND_INT * T1.W, T0.X, literal.z, 1264; EG-NEXT: -1424379385(-5.460358e-13), 143(2.003857e-43) 1265; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1266; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z, 1267; EG-NEXT: CNDE_INT T0.X, T1.W, T1.Z, 0.0, 1268; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1269; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1270 %a = load i64, i64 addrspace(1)* %aptr, align 8 1271 %shl = shl i64 1231231234567, %a 1272 store i64 %shl, i64 addrspace(1)* %out, align 8 1273 ret void 1274} 1275 1276define amdgpu_kernel void @v_shl_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { 1277; SI-LABEL: v_shl_i64_32_bit_constant: 1278; SI: ; %bb.0: 1279; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1280; SI-NEXT: s_mov_b32 s7, 0xf000 1281; SI-NEXT: s_mov_b32 s6, -1 1282; SI-NEXT: s_mov_b32 s10, s6 1283; SI-NEXT: s_mov_b32 s11, s7 1284; SI-NEXT: s_waitcnt lgkmcnt(0) 1285; SI-NEXT: s_mov_b32 s8, s2 1286; SI-NEXT: s_mov_b32 s9, s3 1287; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1288; SI-NEXT: s_mov_b64 s[2:3], 0x12d687 1289; SI-NEXT: s_mov_b32 s4, s0 1290; SI-NEXT: s_mov_b32 s5, s1 1291; SI-NEXT: s_waitcnt vmcnt(0) 1292; SI-NEXT: v_lshl_b64 v[0:1], s[2:3], v0 1293; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1294; SI-NEXT: s_endpgm 1295; 1296; VI-LABEL: v_shl_i64_32_bit_constant: 1297; VI: ; %bb.0: 1298; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1299; VI-NEXT: s_mov_b32 s7, 0xf000 1300; VI-NEXT: s_mov_b32 s6, -1 1301; VI-NEXT: s_waitcnt lgkmcnt(0) 1302; VI-NEXT: s_load_dword s2, s[2:3], 0x0 1303; VI-NEXT: s_mov_b32 s4, s0 1304; VI-NEXT: s_mov_b32 s5, s1 1305; VI-NEXT: s_mov_b64 s[0:1], 0x12d687 1306; VI-NEXT: s_waitcnt lgkmcnt(0) 1307; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 1308; VI-NEXT: v_mov_b32_e32 v0, s0 1309; VI-NEXT: v_mov_b32_e32 v1, s1 1310; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1311; VI-NEXT: s_endpgm 1312; 1313; EG-LABEL: v_shl_i64_32_bit_constant: 1314; EG: ; %bb.0: 1315; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1316; EG-NEXT: TEX 0 @6 1317; EG-NEXT: ALU 11, @9, KC0[CB0:0-32], KC1[] 1318; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1319; EG-NEXT: CF_END 1320; EG-NEXT: PAD 1321; EG-NEXT: Fetch clause starting at 6: 1322; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 1323; EG-NEXT: ALU clause starting at 8: 1324; EG-NEXT: MOV * T0.X, KC0[2].Z, 1325; EG-NEXT: ALU clause starting at 9: 1326; EG-NEXT: AND_INT T0.W, T0.X, literal.x, 1327; EG-NEXT: NOT_INT * T1.W, T0.X, 1328; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 1329; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, literal.x, PS, 1330; EG-NEXT: LSHL T0.W, literal.y, PV.W, 1331; EG-NEXT: AND_INT * T1.W, T0.X, literal.z, 1332; EG-NEXT: 617283(8.649977e-40), 1234567(1.729997e-39) 1333; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1334; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W, 1335; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, 1336; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1337; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1338 %a = load i64, i64 addrspace(1)* %aptr, align 8 1339 %shl = shl i64 1234567, %a 1340 store i64 %shl, i64 addrspace(1)* %out, align 8 1341 ret void 1342} 1343 1344define amdgpu_kernel void @v_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { 1345; SI-LABEL: v_shl_inline_imm_64_i64: 1346; SI: ; %bb.0: 1347; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 1348; SI-NEXT: s_mov_b32 s7, 0xf000 1349; SI-NEXT: s_mov_b32 s6, -1 1350; SI-NEXT: s_mov_b32 s10, s6 1351; SI-NEXT: s_mov_b32 s11, s7 1352; SI-NEXT: s_waitcnt lgkmcnt(0) 1353; SI-NEXT: s_mov_b32 s8, s2 1354; SI-NEXT: s_mov_b32 s9, s3 1355; SI-NEXT: buffer_load_dword v0, off, s[8:11], 0 1356; SI-NEXT: s_mov_b32 s4, s0 1357; SI-NEXT: s_mov_b32 s5, s1 1358; SI-NEXT: s_waitcnt vmcnt(0) 1359; SI-NEXT: v_lshl_b64 v[0:1], 64, v0 1360; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1361; SI-NEXT: s_endpgm 1362; 1363; VI-LABEL: v_shl_inline_imm_64_i64: 1364; VI: ; %bb.0: 1365; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 1366; VI-NEXT: s_waitcnt lgkmcnt(0) 1367; VI-NEXT: s_load_dword s4, s[2:3], 0x0 1368; VI-NEXT: s_mov_b32 s3, 0xf000 1369; VI-NEXT: s_mov_b32 s2, -1 1370; VI-NEXT: s_waitcnt lgkmcnt(0) 1371; VI-NEXT: s_lshl_b64 s[4:5], 64, s4 1372; VI-NEXT: v_mov_b32_e32 v0, s4 1373; VI-NEXT: v_mov_b32_e32 v1, s5 1374; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1375; VI-NEXT: s_endpgm 1376; 1377; EG-LABEL: v_shl_inline_imm_64_i64: 1378; EG: ; %bb.0: 1379; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 1380; EG-NEXT: TEX 0 @6 1381; EG-NEXT: ALU 10, @9, KC0[CB0:0-32], KC1[] 1382; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1383; EG-NEXT: CF_END 1384; EG-NEXT: PAD 1385; EG-NEXT: Fetch clause starting at 6: 1386; EG-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 1387; EG-NEXT: ALU clause starting at 8: 1388; EG-NEXT: MOV * T0.X, KC0[2].Z, 1389; EG-NEXT: ALU clause starting at 9: 1390; EG-NEXT: AND_INT T0.W, T0.X, literal.x, 1391; EG-NEXT: NOT_INT * T1.W, T0.X, 1392; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 1393; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, literal.x, PS, 1394; EG-NEXT: LSHL T0.W, literal.y, PV.W, 1395; EG-NEXT: AND_INT * T1.W, T0.X, literal.x, 1396; EG-NEXT: 32(4.484155e-44), 64(8.968310e-44) 1397; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W, 1398; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, 1399; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1400; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1401 %a = load i64, i64 addrspace(1)* %aptr, align 8 1402 %shl = shl i64 64, %a 1403 store i64 %shl, i64 addrspace(1)* %out, align 8 1404 ret void 1405} 1406 1407define amdgpu_kernel void @s_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1408; SI-LABEL: s_shl_inline_imm_64_i64: 1409; SI: ; %bb.0: 1410; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1411; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1412; SI-NEXT: s_mov_b32 s3, 0xf000 1413; SI-NEXT: s_mov_b32 s2, -1 1414; SI-NEXT: s_waitcnt lgkmcnt(0) 1415; SI-NEXT: s_lshl_b64 s[4:5], 64, s4 1416; SI-NEXT: v_mov_b32_e32 v0, s4 1417; SI-NEXT: v_mov_b32_e32 v1, s5 1418; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1419; SI-NEXT: s_endpgm 1420; 1421; VI-LABEL: s_shl_inline_imm_64_i64: 1422; VI: ; %bb.0: 1423; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1424; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1425; VI-NEXT: s_mov_b32 s3, 0xf000 1426; VI-NEXT: s_mov_b32 s2, -1 1427; VI-NEXT: s_waitcnt lgkmcnt(0) 1428; VI-NEXT: s_lshl_b64 s[4:5], 64, s4 1429; VI-NEXT: v_mov_b32_e32 v0, s4 1430; VI-NEXT: v_mov_b32_e32 v1, s5 1431; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1432; VI-NEXT: s_endpgm 1433; 1434; EG-LABEL: s_shl_inline_imm_64_i64: 1435; EG: ; %bb.0: 1436; EG-NEXT: ALU 10, @4, KC0[CB0:0-32], KC1[] 1437; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1438; EG-NEXT: CF_END 1439; EG-NEXT: PAD 1440; EG-NEXT: ALU clause starting at 4: 1441; EG-NEXT: NOT_INT T0.W, KC0[2].W, 1442; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.x, 1443; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 1444; EG-NEXT: LSHL T0.Z, literal.x, PS, 1445; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, literal.y, PV.W, 1446; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1447; EG-NEXT: 64(8.968310e-44), 32(4.484155e-44) 1448; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z, 1449; EG-NEXT: CNDE_INT T0.X, T1.W, T0.Z, 0.0, 1450; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1451; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1452 %shl = shl i64 64, %a 1453 store i64 %shl, i64 addrspace(1)* %out, align 8 1454 ret void 1455} 1456 1457define amdgpu_kernel void @s_shl_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1458; SI-LABEL: s_shl_inline_imm_1_i64: 1459; SI: ; %bb.0: 1460; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1461; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1462; SI-NEXT: s_mov_b32 s3, 0xf000 1463; SI-NEXT: s_mov_b32 s2, -1 1464; SI-NEXT: s_waitcnt lgkmcnt(0) 1465; SI-NEXT: s_lshl_b64 s[4:5], 1, s4 1466; SI-NEXT: v_mov_b32_e32 v0, s4 1467; SI-NEXT: v_mov_b32_e32 v1, s5 1468; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1469; SI-NEXT: s_endpgm 1470; 1471; VI-LABEL: s_shl_inline_imm_1_i64: 1472; VI: ; %bb.0: 1473; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1474; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1475; VI-NEXT: s_mov_b32 s3, 0xf000 1476; VI-NEXT: s_mov_b32 s2, -1 1477; VI-NEXT: s_waitcnt lgkmcnt(0) 1478; VI-NEXT: s_lshl_b64 s[4:5], 1, s4 1479; VI-NEXT: v_mov_b32_e32 v0, s4 1480; VI-NEXT: v_mov_b32_e32 v1, s5 1481; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1482; VI-NEXT: s_endpgm 1483; 1484; EG-LABEL: s_shl_inline_imm_1_i64: 1485; EG: ; %bb.0: 1486; EG-NEXT: ALU 11, @4, KC0[CB0:0-32], KC1[] 1487; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1488; EG-NEXT: CF_END 1489; EG-NEXT: PAD 1490; EG-NEXT: ALU clause starting at 4: 1491; EG-NEXT: AND_INT T0.W, KC0[2].W, literal.x, 1492; EG-NEXT: LSHL * T1.W, KC0[2].W, literal.y, 1493; EG-NEXT: 31(4.344025e-44), 26(3.643376e-44) 1494; EG-NEXT: ASHR T1.W, PS, literal.x, 1495; EG-NEXT: LSHL * T0.W, 1, PV.W, 1496; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 1497; EG-NEXT: AND_INT T0.Y, PV.W, PS, 1498; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.x, 1499; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1500; EG-NEXT: CNDE_INT T0.X, PV.W, T0.W, 0.0, 1501; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1502; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1503 %shl = shl i64 1, %a 1504 store i64 %shl, i64 addrspace(1)* %out, align 8 1505 ret void 1506} 1507 1508define amdgpu_kernel void @s_shl_inline_imm_1_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1509; SI-LABEL: s_shl_inline_imm_1_0_i64: 1510; SI: ; %bb.0: 1511; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1512; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1513; SI-NEXT: s_mov_b32 s3, 0xf000 1514; SI-NEXT: s_mov_b32 s2, -1 1515; SI-NEXT: s_waitcnt lgkmcnt(0) 1516; SI-NEXT: s_lshl_b64 s[4:5], 1.0, s4 1517; SI-NEXT: v_mov_b32_e32 v0, s4 1518; SI-NEXT: v_mov_b32_e32 v1, s5 1519; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1520; SI-NEXT: s_endpgm 1521; 1522; VI-LABEL: s_shl_inline_imm_1_0_i64: 1523; VI: ; %bb.0: 1524; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1525; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1526; VI-NEXT: s_mov_b32 s3, 0xf000 1527; VI-NEXT: s_mov_b32 s2, -1 1528; VI-NEXT: s_waitcnt lgkmcnt(0) 1529; VI-NEXT: s_lshl_b64 s[4:5], 1.0, s4 1530; VI-NEXT: v_mov_b32_e32 v0, s4 1531; VI-NEXT: v_mov_b32_e32 v1, s5 1532; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1533; VI-NEXT: s_endpgm 1534; 1535; EG-LABEL: s_shl_inline_imm_1_0_i64: 1536; EG: ; %bb.0: 1537; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1538; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1539; EG-NEXT: CF_END 1540; EG-NEXT: PAD 1541; EG-NEXT: ALU clause starting at 4: 1542; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1543; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1544; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1545; EG-NEXT: 536346624(1.050321e-19), 32(4.484155e-44) 1546; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1547; EG-NEXT: MOV T0.X, 0.0, 1548; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1549; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1550 %shl = shl i64 4607182418800017408, %a 1551 store i64 %shl, i64 addrspace(1)* %out, align 8 1552 ret void 1553} 1554 1555define amdgpu_kernel void @s_shl_inline_imm_neg_1_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1556; SI-LABEL: s_shl_inline_imm_neg_1_0_i64: 1557; SI: ; %bb.0: 1558; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1559; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1560; SI-NEXT: s_mov_b32 s3, 0xf000 1561; SI-NEXT: s_mov_b32 s2, -1 1562; SI-NEXT: s_waitcnt lgkmcnt(0) 1563; SI-NEXT: s_lshl_b64 s[4:5], -1.0, s4 1564; SI-NEXT: v_mov_b32_e32 v0, s4 1565; SI-NEXT: v_mov_b32_e32 v1, s5 1566; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1567; SI-NEXT: s_endpgm 1568; 1569; VI-LABEL: s_shl_inline_imm_neg_1_0_i64: 1570; VI: ; %bb.0: 1571; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1572; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1573; VI-NEXT: s_mov_b32 s3, 0xf000 1574; VI-NEXT: s_mov_b32 s2, -1 1575; VI-NEXT: s_waitcnt lgkmcnt(0) 1576; VI-NEXT: s_lshl_b64 s[4:5], -1.0, s4 1577; VI-NEXT: v_mov_b32_e32 v0, s4 1578; VI-NEXT: v_mov_b32_e32 v1, s5 1579; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1580; VI-NEXT: s_endpgm 1581; 1582; EG-LABEL: s_shl_inline_imm_neg_1_0_i64: 1583; EG: ; %bb.0: 1584; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1585; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1586; EG-NEXT: CF_END 1587; EG-NEXT: PAD 1588; EG-NEXT: ALU clause starting at 4: 1589; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1590; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1591; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1592; EG-NEXT: 1610088448(3.574057e+19), 32(4.484155e-44) 1593; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1594; EG-NEXT: MOV T0.X, 0.0, 1595; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1596; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1597 %shl = shl i64 13830554455654793216, %a 1598 store i64 %shl, i64 addrspace(1)* %out, align 8 1599 ret void 1600} 1601 1602define amdgpu_kernel void @s_shl_inline_imm_0_5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1603; SI-LABEL: s_shl_inline_imm_0_5_i64: 1604; SI: ; %bb.0: 1605; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1606; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1607; SI-NEXT: s_mov_b32 s3, 0xf000 1608; SI-NEXT: s_mov_b32 s2, -1 1609; SI-NEXT: s_waitcnt lgkmcnt(0) 1610; SI-NEXT: s_lshl_b64 s[4:5], 0.5, s4 1611; SI-NEXT: v_mov_b32_e32 v0, s4 1612; SI-NEXT: v_mov_b32_e32 v1, s5 1613; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1614; SI-NEXT: s_endpgm 1615; 1616; VI-LABEL: s_shl_inline_imm_0_5_i64: 1617; VI: ; %bb.0: 1618; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1619; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1620; VI-NEXT: s_mov_b32 s3, 0xf000 1621; VI-NEXT: s_mov_b32 s2, -1 1622; VI-NEXT: s_waitcnt lgkmcnt(0) 1623; VI-NEXT: s_lshl_b64 s[4:5], 0.5, s4 1624; VI-NEXT: v_mov_b32_e32 v0, s4 1625; VI-NEXT: v_mov_b32_e32 v1, s5 1626; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1627; VI-NEXT: s_endpgm 1628; 1629; EG-LABEL: s_shl_inline_imm_0_5_i64: 1630; EG: ; %bb.0: 1631; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1632; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1633; EG-NEXT: CF_END 1634; EG-NEXT: PAD 1635; EG-NEXT: ALU clause starting at 4: 1636; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1637; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1638; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1639; EG-NEXT: 535822336(1.016440e-19), 32(4.484155e-44) 1640; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1641; EG-NEXT: MOV T0.X, 0.0, 1642; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1643; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1644 %shl = shl i64 4602678819172646912, %a 1645 store i64 %shl, i64 addrspace(1)* %out, align 8 1646 ret void 1647} 1648 1649define amdgpu_kernel void @s_shl_inline_imm_neg_0_5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1650; SI-LABEL: s_shl_inline_imm_neg_0_5_i64: 1651; SI: ; %bb.0: 1652; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1653; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1654; SI-NEXT: s_mov_b32 s3, 0xf000 1655; SI-NEXT: s_mov_b32 s2, -1 1656; SI-NEXT: s_waitcnt lgkmcnt(0) 1657; SI-NEXT: s_lshl_b64 s[4:5], -0.5, s4 1658; SI-NEXT: v_mov_b32_e32 v0, s4 1659; SI-NEXT: v_mov_b32_e32 v1, s5 1660; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1661; SI-NEXT: s_endpgm 1662; 1663; VI-LABEL: s_shl_inline_imm_neg_0_5_i64: 1664; VI: ; %bb.0: 1665; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1666; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1667; VI-NEXT: s_mov_b32 s3, 0xf000 1668; VI-NEXT: s_mov_b32 s2, -1 1669; VI-NEXT: s_waitcnt lgkmcnt(0) 1670; VI-NEXT: s_lshl_b64 s[4:5], -0.5, s4 1671; VI-NEXT: v_mov_b32_e32 v0, s4 1672; VI-NEXT: v_mov_b32_e32 v1, s5 1673; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1674; VI-NEXT: s_endpgm 1675; 1676; EG-LABEL: s_shl_inline_imm_neg_0_5_i64: 1677; EG: ; %bb.0: 1678; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1679; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1680; EG-NEXT: CF_END 1681; EG-NEXT: PAD 1682; EG-NEXT: ALU clause starting at 4: 1683; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1684; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1685; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1686; EG-NEXT: 1609564160(3.458765e+19), 32(4.484155e-44) 1687; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1688; EG-NEXT: MOV T0.X, 0.0, 1689; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1690; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1691 %shl = shl i64 13826050856027422720, %a 1692 store i64 %shl, i64 addrspace(1)* %out, align 8 1693 ret void 1694} 1695 1696define amdgpu_kernel void @s_shl_inline_imm_2_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1697; SI-LABEL: s_shl_inline_imm_2_0_i64: 1698; SI: ; %bb.0: 1699; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1700; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1701; SI-NEXT: s_mov_b32 s3, 0xf000 1702; SI-NEXT: s_mov_b32 s2, -1 1703; SI-NEXT: s_waitcnt lgkmcnt(0) 1704; SI-NEXT: s_lshl_b64 s[4:5], 2.0, s4 1705; SI-NEXT: v_mov_b32_e32 v0, s4 1706; SI-NEXT: v_mov_b32_e32 v1, s5 1707; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1708; SI-NEXT: s_endpgm 1709; 1710; VI-LABEL: s_shl_inline_imm_2_0_i64: 1711; VI: ; %bb.0: 1712; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1713; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1714; VI-NEXT: s_mov_b32 s3, 0xf000 1715; VI-NEXT: s_mov_b32 s2, -1 1716; VI-NEXT: s_waitcnt lgkmcnt(0) 1717; VI-NEXT: s_lshl_b64 s[4:5], 2.0, s4 1718; VI-NEXT: v_mov_b32_e32 v0, s4 1719; VI-NEXT: v_mov_b32_e32 v1, s5 1720; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1721; VI-NEXT: s_endpgm 1722; 1723; EG-LABEL: s_shl_inline_imm_2_0_i64: 1724; EG: ; %bb.0: 1725; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1726; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1727; EG-NEXT: CF_END 1728; EG-NEXT: PAD 1729; EG-NEXT: ALU clause starting at 4: 1730; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1731; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1732; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1733; EG-NEXT: 536870912(1.084202e-19), 32(4.484155e-44) 1734; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1735; EG-NEXT: MOV T0.X, 0.0, 1736; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1737; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1738 %shl = shl i64 4611686018427387904, %a 1739 store i64 %shl, i64 addrspace(1)* %out, align 8 1740 ret void 1741} 1742 1743define amdgpu_kernel void @s_shl_inline_imm_neg_2_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1744; SI-LABEL: s_shl_inline_imm_neg_2_0_i64: 1745; SI: ; %bb.0: 1746; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1747; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1748; SI-NEXT: s_mov_b32 s3, 0xf000 1749; SI-NEXT: s_mov_b32 s2, -1 1750; SI-NEXT: s_waitcnt lgkmcnt(0) 1751; SI-NEXT: s_lshl_b64 s[4:5], -2.0, s4 1752; SI-NEXT: v_mov_b32_e32 v0, s4 1753; SI-NEXT: v_mov_b32_e32 v1, s5 1754; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1755; SI-NEXT: s_endpgm 1756; 1757; VI-LABEL: s_shl_inline_imm_neg_2_0_i64: 1758; VI: ; %bb.0: 1759; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1760; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1761; VI-NEXT: s_mov_b32 s3, 0xf000 1762; VI-NEXT: s_mov_b32 s2, -1 1763; VI-NEXT: s_waitcnt lgkmcnt(0) 1764; VI-NEXT: s_lshl_b64 s[4:5], -2.0, s4 1765; VI-NEXT: v_mov_b32_e32 v0, s4 1766; VI-NEXT: v_mov_b32_e32 v1, s5 1767; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1768; VI-NEXT: s_endpgm 1769; 1770; EG-LABEL: s_shl_inline_imm_neg_2_0_i64: 1771; EG: ; %bb.0: 1772; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1773; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1774; EG-NEXT: CF_END 1775; EG-NEXT: PAD 1776; EG-NEXT: ALU clause starting at 4: 1777; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1778; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1779; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1780; EG-NEXT: 1610612736(3.689349e+19), 32(4.484155e-44) 1781; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1782; EG-NEXT: MOV T0.X, 0.0, 1783; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1784; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1785 %shl = shl i64 13835058055282163712, %a 1786 store i64 %shl, i64 addrspace(1)* %out, align 8 1787 ret void 1788} 1789 1790define amdgpu_kernel void @s_shl_inline_imm_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1791; SI-LABEL: s_shl_inline_imm_4_0_i64: 1792; SI: ; %bb.0: 1793; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1794; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1795; SI-NEXT: s_mov_b32 s3, 0xf000 1796; SI-NEXT: s_mov_b32 s2, -1 1797; SI-NEXT: s_waitcnt lgkmcnt(0) 1798; SI-NEXT: s_lshl_b64 s[4:5], 4.0, s4 1799; SI-NEXT: v_mov_b32_e32 v0, s4 1800; SI-NEXT: v_mov_b32_e32 v1, s5 1801; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1802; SI-NEXT: s_endpgm 1803; 1804; VI-LABEL: s_shl_inline_imm_4_0_i64: 1805; VI: ; %bb.0: 1806; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1807; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1808; VI-NEXT: s_mov_b32 s3, 0xf000 1809; VI-NEXT: s_mov_b32 s2, -1 1810; VI-NEXT: s_waitcnt lgkmcnt(0) 1811; VI-NEXT: s_lshl_b64 s[4:5], 4.0, s4 1812; VI-NEXT: v_mov_b32_e32 v0, s4 1813; VI-NEXT: v_mov_b32_e32 v1, s5 1814; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1815; VI-NEXT: s_endpgm 1816; 1817; EG-LABEL: s_shl_inline_imm_4_0_i64: 1818; EG: ; %bb.0: 1819; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1820; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1821; EG-NEXT: CF_END 1822; EG-NEXT: PAD 1823; EG-NEXT: ALU clause starting at 4: 1824; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1825; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1826; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1827; EG-NEXT: 537395200(1.151965e-19), 32(4.484155e-44) 1828; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1829; EG-NEXT: MOV T0.X, 0.0, 1830; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1831; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1832 %shl = shl i64 4616189618054758400, %a 1833 store i64 %shl, i64 addrspace(1)* %out, align 8 1834 ret void 1835} 1836 1837define amdgpu_kernel void @s_shl_inline_imm_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1838; SI-LABEL: s_shl_inline_imm_neg_4_0_i64: 1839; SI: ; %bb.0: 1840; SI-NEXT: s_load_dword s4, s[0:1], 0xd 1841; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 1842; SI-NEXT: s_mov_b32 s3, 0xf000 1843; SI-NEXT: s_mov_b32 s2, -1 1844; SI-NEXT: s_waitcnt lgkmcnt(0) 1845; SI-NEXT: s_lshl_b64 s[4:5], -4.0, s4 1846; SI-NEXT: v_mov_b32_e32 v0, s4 1847; SI-NEXT: v_mov_b32_e32 v1, s5 1848; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1849; SI-NEXT: s_endpgm 1850; 1851; VI-LABEL: s_shl_inline_imm_neg_4_0_i64: 1852; VI: ; %bb.0: 1853; VI-NEXT: s_load_dword s4, s[0:1], 0x34 1854; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 1855; VI-NEXT: s_mov_b32 s3, 0xf000 1856; VI-NEXT: s_mov_b32 s2, -1 1857; VI-NEXT: s_waitcnt lgkmcnt(0) 1858; VI-NEXT: s_lshl_b64 s[4:5], -4.0, s4 1859; VI-NEXT: v_mov_b32_e32 v0, s4 1860; VI-NEXT: v_mov_b32_e32 v1, s5 1861; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 1862; VI-NEXT: s_endpgm 1863; 1864; EG-LABEL: s_shl_inline_imm_neg_4_0_i64: 1865; EG: ; %bb.0: 1866; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 1867; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1868; EG-NEXT: CF_END 1869; EG-NEXT: PAD 1870; EG-NEXT: ALU clause starting at 4: 1871; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 1872; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 1873; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 1874; EG-NEXT: 1611137024(3.919933e+19), 32(4.484155e-44) 1875; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 1876; EG-NEXT: MOV T0.X, 0.0, 1877; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1878; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1879 %shl = shl i64 13839561654909534208, %a 1880 store i64 %shl, i64 addrspace(1)* %out, align 8 1881 ret void 1882} 1883 1884 1885; Test with the 64-bit integer bitpattern for a 32-bit float in the 1886; low 32-bits, which is not a valid 64-bit inline immmediate. 1887define amdgpu_kernel void @s_shl_inline_imm_f32_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1888; SI-LABEL: s_shl_inline_imm_f32_4_0_i64: 1889; SI: ; %bb.0: 1890; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1891; SI-NEXT: s_load_dword s2, s[0:1], 0xd 1892; SI-NEXT: s_mov_b64 s[0:1], 0x40800000 1893; SI-NEXT: s_mov_b32 s7, 0xf000 1894; SI-NEXT: s_mov_b32 s6, -1 1895; SI-NEXT: s_waitcnt lgkmcnt(0) 1896; SI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 1897; SI-NEXT: v_mov_b32_e32 v0, s0 1898; SI-NEXT: v_mov_b32_e32 v1, s1 1899; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1900; SI-NEXT: s_endpgm 1901; 1902; VI-LABEL: s_shl_inline_imm_f32_4_0_i64: 1903; VI: ; %bb.0: 1904; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 1905; VI-NEXT: s_load_dword s2, s[0:1], 0x34 1906; VI-NEXT: s_mov_b64 s[0:1], 0x40800000 1907; VI-NEXT: s_mov_b32 s7, 0xf000 1908; VI-NEXT: s_mov_b32 s6, -1 1909; VI-NEXT: s_waitcnt lgkmcnt(0) 1910; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 1911; VI-NEXT: v_mov_b32_e32 v0, s0 1912; VI-NEXT: v_mov_b32_e32 v1, s1 1913; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1914; VI-NEXT: s_endpgm 1915; 1916; EG-LABEL: s_shl_inline_imm_f32_4_0_i64: 1917; EG: ; %bb.0: 1918; EG-NEXT: ALU 11, @4, KC0[CB0:0-32], KC1[] 1919; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1920; EG-NEXT: CF_END 1921; EG-NEXT: PAD 1922; EG-NEXT: ALU clause starting at 4: 1923; EG-NEXT: NOT_INT T0.W, KC0[2].W, 1924; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.x, 1925; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 1926; EG-NEXT: LSHL T0.Z, literal.x, PS, 1927; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, literal.y, PV.W, 1928; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.z, 1929; EG-NEXT: 1082130432(4.000000e+00), 541065216(1.626303e-19) 1930; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1931; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, PV.Z, 1932; EG-NEXT: CNDE_INT T0.X, T1.W, T0.Z, 0.0, 1933; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1934; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1935 %shl = shl i64 1082130432, %a 1936 store i64 %shl, i64 addrspace(1)* %out, align 8 1937 ret void 1938} 1939 1940; FIXME: Copy of -1 register 1941define amdgpu_kernel void @s_shl_inline_imm_f32_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1942; SI-LABEL: s_shl_inline_imm_f32_neg_4_0_i64: 1943; SI: ; %bb.0: 1944; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 1945; SI-NEXT: s_load_dword s2, s[0:1], 0xd 1946; SI-NEXT: s_mov_b32 s6, -1 1947; SI-NEXT: s_mov_b32 s0, -4.0 1948; SI-NEXT: s_mov_b32 s1, s6 1949; SI-NEXT: s_mov_b32 s7, 0xf000 1950; SI-NEXT: s_waitcnt lgkmcnt(0) 1951; SI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 1952; SI-NEXT: v_mov_b32_e32 v0, s0 1953; SI-NEXT: v_mov_b32_e32 v1, s1 1954; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1955; SI-NEXT: s_endpgm 1956; 1957; VI-LABEL: s_shl_inline_imm_f32_neg_4_0_i64: 1958; VI: ; %bb.0: 1959; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 1960; VI-NEXT: s_load_dword s2, s[0:1], 0x34 1961; VI-NEXT: s_mov_b32 s6, -1 1962; VI-NEXT: s_mov_b32 s0, -4.0 1963; VI-NEXT: s_mov_b32 s1, s6 1964; VI-NEXT: s_mov_b32 s7, 0xf000 1965; VI-NEXT: s_waitcnt lgkmcnt(0) 1966; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 1967; VI-NEXT: v_mov_b32_e32 v0, s0 1968; VI-NEXT: v_mov_b32_e32 v1, s1 1969; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 1970; VI-NEXT: s_endpgm 1971; 1972; EG-LABEL: s_shl_inline_imm_f32_neg_4_0_i64: 1973; EG: ; %bb.0: 1974; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] 1975; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 1976; EG-NEXT: CF_END 1977; EG-NEXT: PAD 1978; EG-NEXT: ALU clause starting at 4: 1979; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x, 1980; EG-NEXT: MOV T0.W, literal.y, 1981; EG-NEXT: NOT_INT * T1.W, KC0[2].W, 1982; EG-NEXT: 31(4.344025e-44), -532676608(-5.534023e+19) 1983; EG-NEXT: BIT_ALIGN_INT T1.Z, literal.x, PV.W, PS, 1984; EG-NEXT: LSHL T0.W, literal.y, PV.Z, 1985; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.z, 1986; EG-NEXT: 2147483647(nan), -1065353216(-4.000000e+00) 1987; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 1988; EG-NEXT: CNDE_INT * T0.Y, PS, PV.Z, PV.W, 1989; EG-NEXT: CNDE_INT T0.X, T1.W, T0.W, 0.0, 1990; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 1991; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 1992 %shl = shl i64 -1065353216, %a 1993 store i64 %shl, i64 addrspace(1)* %out, align 8 1994 ret void 1995} 1996 1997define amdgpu_kernel void @s_shl_inline_high_imm_f32_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 1998; SI-LABEL: s_shl_inline_high_imm_f32_4_0_i64: 1999; SI: ; %bb.0: 2000; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2001; SI-NEXT: s_load_dword s2, s[0:1], 0xd 2002; SI-NEXT: s_mov_b32 s0, 0 2003; SI-NEXT: s_mov_b32 s1, 4.0 2004; SI-NEXT: s_mov_b32 s7, 0xf000 2005; SI-NEXT: s_mov_b32 s6, -1 2006; SI-NEXT: s_waitcnt lgkmcnt(0) 2007; SI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 2008; SI-NEXT: v_mov_b32_e32 v0, s0 2009; SI-NEXT: v_mov_b32_e32 v1, s1 2010; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 2011; SI-NEXT: s_endpgm 2012; 2013; VI-LABEL: s_shl_inline_high_imm_f32_4_0_i64: 2014; VI: ; %bb.0: 2015; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 2016; VI-NEXT: s_load_dword s2, s[0:1], 0x34 2017; VI-NEXT: s_mov_b32 s0, 0 2018; VI-NEXT: s_mov_b32 s1, 4.0 2019; VI-NEXT: s_mov_b32 s7, 0xf000 2020; VI-NEXT: s_mov_b32 s6, -1 2021; VI-NEXT: s_waitcnt lgkmcnt(0) 2022; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 2023; VI-NEXT: v_mov_b32_e32 v0, s0 2024; VI-NEXT: v_mov_b32_e32 v1, s1 2025; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 2026; VI-NEXT: s_endpgm 2027; 2028; EG-LABEL: s_shl_inline_high_imm_f32_4_0_i64: 2029; EG: ; %bb.0: 2030; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 2031; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 2032; EG-NEXT: CF_END 2033; EG-NEXT: PAD 2034; EG-NEXT: ALU clause starting at 4: 2035; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 2036; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 2037; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 2038; EG-NEXT: 541065216(1.626303e-19), 32(4.484155e-44) 2039; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 2040; EG-NEXT: MOV T0.X, 0.0, 2041; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 2042; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2043 %shl = shl i64 4647714815446351872, %a 2044 store i64 %shl, i64 addrspace(1)* %out, align 8 2045 ret void 2046} 2047 2048define amdgpu_kernel void @s_shl_inline_high_imm_f32_neg_4_0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 2049; SI-LABEL: s_shl_inline_high_imm_f32_neg_4_0_i64: 2050; SI: ; %bb.0: 2051; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 2052; SI-NEXT: s_load_dword s2, s[0:1], 0xd 2053; SI-NEXT: s_mov_b32 s0, 0 2054; SI-NEXT: s_mov_b32 s1, -4.0 2055; SI-NEXT: s_mov_b32 s7, 0xf000 2056; SI-NEXT: s_mov_b32 s6, -1 2057; SI-NEXT: s_waitcnt lgkmcnt(0) 2058; SI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 2059; SI-NEXT: v_mov_b32_e32 v0, s0 2060; SI-NEXT: v_mov_b32_e32 v1, s1 2061; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 2062; SI-NEXT: s_endpgm 2063; 2064; VI-LABEL: s_shl_inline_high_imm_f32_neg_4_0_i64: 2065; VI: ; %bb.0: 2066; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 2067; VI-NEXT: s_load_dword s2, s[0:1], 0x34 2068; VI-NEXT: s_mov_b32 s0, 0 2069; VI-NEXT: s_mov_b32 s1, -4.0 2070; VI-NEXT: s_mov_b32 s7, 0xf000 2071; VI-NEXT: s_mov_b32 s6, -1 2072; VI-NEXT: s_waitcnt lgkmcnt(0) 2073; VI-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 2074; VI-NEXT: v_mov_b32_e32 v0, s0 2075; VI-NEXT: v_mov_b32_e32 v1, s1 2076; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 2077; VI-NEXT: s_endpgm 2078; 2079; EG-LABEL: s_shl_inline_high_imm_f32_neg_4_0_i64: 2080; EG: ; %bb.0: 2081; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 2082; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 2083; EG-NEXT: CF_END 2084; EG-NEXT: PAD 2085; EG-NEXT: ALU clause starting at 4: 2086; EG-NEXT: NOT_INT * T0.W, KC0[2].W, 2087; EG-NEXT: BIT_ALIGN_INT T0.W, literal.x, 0.0, PV.W, 2088; EG-NEXT: AND_INT * T1.W, KC0[2].W, literal.y, 2089; EG-NEXT: 1614807040(5.534023e+19), 32(4.484155e-44) 2090; EG-NEXT: CNDE_INT * T0.Y, PS, PV.W, 0.0, 2091; EG-NEXT: MOV T0.X, 0.0, 2092; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 2093; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2094 %shl = shl i64 13871086852301127680, %a 2095 store i64 %shl, i64 addrspace(1)* %out, align 8 2096 ret void 2097} 2098 2099define amdgpu_kernel void @test_mul2(i32 %p) { 2100; SI-LABEL: test_mul2: 2101; SI: ; %bb.0: 2102; SI-NEXT: s_load_dword s0, s[0:1], 0x9 2103; SI-NEXT: s_mov_b32 s3, 0xf000 2104; SI-NEXT: s_mov_b32 s2, -1 2105; SI-NEXT: s_waitcnt lgkmcnt(0) 2106; SI-NEXT: s_lshl_b32 s0, s0, 1 2107; SI-NEXT: v_mov_b32_e32 v0, s0 2108; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 2109; SI-NEXT: s_waitcnt vmcnt(0) 2110; SI-NEXT: s_endpgm 2111; 2112; VI-LABEL: test_mul2: 2113; VI: ; %bb.0: 2114; VI-NEXT: s_load_dword s0, s[0:1], 0x24 2115; VI-NEXT: s_mov_b32 s3, 0xf000 2116; VI-NEXT: s_mov_b32 s2, -1 2117; VI-NEXT: s_waitcnt lgkmcnt(0) 2118; VI-NEXT: s_lshl_b32 s0, s0, 1 2119; VI-NEXT: v_mov_b32_e32 v0, s0 2120; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 2121; VI-NEXT: s_waitcnt vmcnt(0) 2122; VI-NEXT: s_endpgm 2123; 2124; EG-LABEL: test_mul2: 2125; EG: ; %bb.0: 2126; EG-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 2127; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 2128; EG-NEXT: CF_END 2129; EG-NEXT: PAD 2130; EG-NEXT: ALU clause starting at 4: 2131; EG-NEXT: MOV T0.X, literal.x, 2132; EG-NEXT: LSHL * T1.X, KC0[2].Y, 1, 2133; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00) 2134 %i = mul i32 %p, 2 2135 store volatile i32 %i, i32 addrspace(1)* undef 2136 ret void 2137} 2138 2139define void @shl_or_k(i32 addrspace(1)* %out, i32 %in) { 2140; SI-LABEL: shl_or_k: 2141; SI: ; %bb.0: 2142; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2143; SI-NEXT: s_mov_b32 s6, 0 2144; SI-NEXT: v_lshlrev_b32_e32 v2, 2, v2 2145; SI-NEXT: s_mov_b32 s7, 0xf000 2146; SI-NEXT: s_mov_b32 s4, s6 2147; SI-NEXT: s_mov_b32 s5, s6 2148; SI-NEXT: v_or_b32_e32 v2, 4, v2 2149; SI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64 2150; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2151; SI-NEXT: s_setpc_b64 s[30:31] 2152; 2153; VI-LABEL: shl_or_k: 2154; VI: ; %bb.0: 2155; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2156; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v2 2157; VI-NEXT: v_or_b32_e32 v2, 4, v2 2158; VI-NEXT: flat_store_dword v[0:1], v2 2159; VI-NEXT: s_waitcnt vmcnt(0) 2160; VI-NEXT: s_setpc_b64 s[30:31] 2161; 2162; EG-LABEL: shl_or_k: 2163; EG: ; %bb.0: 2164; EG-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] 2165; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 2166; EG-NEXT: CF_END 2167; EG-NEXT: PAD 2168; EG-NEXT: ALU clause starting at 4: 2169; EG-NEXT: LSHL * T0.W, KC0[2].Z, literal.x, 2170; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2171; EG-NEXT: OR_INT T0.X, PV.W, literal.x, 2172; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y, 2173; EG-NEXT: 4(5.605194e-45), 2(2.802597e-45) 2174 %tmp0 = or i32 %in, 1 2175 %tmp2 = shl i32 %tmp0, 2 2176 store i32 %tmp2, i32 addrspace(1)* %out 2177 ret void 2178} 2179 2180define void @shl_or_k_two_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %in) { 2181; SI-LABEL: shl_or_k_two_uses: 2182; SI: ; %bb.0: 2183; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2184; SI-NEXT: s_mov_b32 s6, 0 2185; SI-NEXT: v_or_b32_e32 v4, 1, v4 2186; SI-NEXT: s_mov_b32 s7, 0xf000 2187; SI-NEXT: s_mov_b32 s4, s6 2188; SI-NEXT: s_mov_b32 s5, s6 2189; SI-NEXT: v_lshlrev_b32_e32 v5, 2, v4 2190; SI-NEXT: buffer_store_dword v5, v[0:1], s[4:7], 0 addr64 2191; SI-NEXT: buffer_store_dword v4, v[2:3], s[4:7], 0 addr64 2192; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2193; SI-NEXT: s_setpc_b64 s[30:31] 2194; 2195; VI-LABEL: shl_or_k_two_uses: 2196; VI: ; %bb.0: 2197; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2198; VI-NEXT: v_or_b32_e32 v4, 1, v4 2199; VI-NEXT: v_lshlrev_b32_e32 v5, 2, v4 2200; VI-NEXT: flat_store_dword v[0:1], v5 2201; VI-NEXT: flat_store_dword v[2:3], v4 2202; VI-NEXT: s_waitcnt vmcnt(0) 2203; VI-NEXT: s_setpc_b64 s[30:31] 2204; 2205; EG-LABEL: shl_or_k_two_uses: 2206; EG: ; %bb.0: 2207; EG-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] 2208; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.X, T3.X, 0 2209; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 2210; EG-NEXT: CF_END 2211; EG-NEXT: ALU clause starting at 4: 2212; EG-NEXT: LSHR T0.X, KC0[2].Z, literal.x, 2213; EG-NEXT: OR_INT * T1.X, KC0[2].W, 1, 2214; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2215; EG-NEXT: LSHL T2.X, PS, literal.x, 2216; EG-NEXT: LSHR * T3.X, KC0[2].Y, literal.x, 2217; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 2218 %tmp0 = or i32 %in, 1 2219 %tmp2 = shl i32 %tmp0, 2 2220 store i32 %tmp2, i32 addrspace(1)* %out0 2221 store i32 %tmp0, i32 addrspace(1)* %out1 2222 ret void 2223} 2224 2225attributes #0 = { nounwind readnone } 2226