1; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=SI %s 2; XUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=GCN -check-prefix=VI %s 3; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s 4 5declare i32 @llvm.r600.read.tidig.x() #0 6 7 8;EG: {{^}}shl_v2i32: 9;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 10;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 11 12;SI: {{^}}shl_v2i32: 13;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 14;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 15 16;VI: {{^}}shl_v2i32: 17;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 18;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 19 20define void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { 21 %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 22 %a = load <2 x i32>, <2 x i32> addrspace(1)* %in 23 %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr 24 %result = shl <2 x i32> %a, %b 25 store <2 x i32> %result, <2 x i32> addrspace(1)* %out 26 ret void 27} 28 29;EG: {{^}}shl_v4i32: 30;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 31;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 32;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 33;EG: LSHL {{\*? *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 34 35;SI: {{^}}shl_v4i32: 36;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 37;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 38;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 39;SI: v_lshl_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 40 41;VI: {{^}}shl_v4i32: 42;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 43;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 44;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 45;VI: v_lshlrev_b32_e32 v{{[0-9]+, v[0-9]+, v[0-9]+}} 46 47define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 48 %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 49 %a = load <4 x i32>, <4 x i32> addrspace(1)* %in 50 %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr 51 %result = shl <4 x i32> %a, %b 52 store <4 x i32> %result, <4 x i32> addrspace(1)* %out 53 ret void 54} 55 56; GCN-LABEL: {{^}}shl_i16: 57; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 58 59; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 60define void @shl_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 61 %b_ptr = getelementptr i16, i16 addrspace(1)* %in, i16 1 62 %a = load i16, i16 addrspace(1)* %in 63 %b = load i16, i16 addrspace(1)* %b_ptr 64 %result = shl i16 %a, %b 65 store i16 %result, i16 addrspace(1)* %out 66 ret void 67} 68 69; GCN-LABEL: {{^}}shl_i16_v_s: 70; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} 71 72; VI: v_lshlrev_b16_e64 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} 73define void @shl_i16_v_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) { 74 %a = load i16, i16 addrspace(1)* %in 75 %result = shl i16 %a, %b 76 store i16 %result, i16 addrspace(1)* %out 77 ret void 78} 79 80; GCN-LABEL: {{^}}shl_i16_v_compute_s: 81; SI: v_lshlrev_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 82 83; VI: v_lshlrev_b16_e64 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}} 84define void @shl_i16_v_compute_s(i16 addrspace(1)* %out, i16 addrspace(1)* %in, i16 %b) { 85 %a = load i16, i16 addrspace(1)* %in 86 %b.add = add i16 %b, 3 87 %result = shl i16 %a, %b.add 88 store i16 %result, i16 addrspace(1)* %out 89 ret void 90} 91 92; GCN-LABEL: {{^}}shl_i16_computed_amount: 93; VI: v_add_u16_e32 [[ADD:v[0-9]+]], 3, v{{[0-9]+}} 94; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, [[ADD]], v{{[0-9]+}} 95define void @shl_i16_computed_amount(i16 addrspace(1)* %out, i16 addrspace(1)* %in) { 96 %tid = call i32 @llvm.r600.read.tidig.x() #0 97 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i32 %tid 98 %gep.out = getelementptr inbounds i16, i16 addrspace(1)* %out, i32 %tid 99 %b_ptr = getelementptr i16, i16 addrspace(1)* %gep, i16 1 100 %a = load volatile i16, i16 addrspace(1)* %in 101 %b = load volatile i16, i16 addrspace(1)* %b_ptr 102 %b.add = add i16 %b, 3 103 %result = shl i16 %a, %b.add 104 store i16 %result, i16 addrspace(1)* %out 105 ret void 106} 107 108; GCN-LABEL: {{^}}shl_i16_i_s: 109; GCN: s_lshl_b32 s{{[0-9]+}}, s{{[0-9]+}}, 12 110define void @shl_i16_i_s(i16 addrspace(1)* %out, i16 zeroext %a) { 111 %result = shl i16 %a, 12 112 store i16 %result, i16 addrspace(1)* %out 113 ret void 114} 115 116; GCN-LABEL: {{^}}shl_v2i16: 117; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 118; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 119define void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 120 %tid = call i32 @llvm.r600.read.tidig.x() #0 121 %gep = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %in, i32 %tid 122 %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid 123 %b_ptr = getelementptr <2 x i16>, <2 x i16> addrspace(1)* %gep, i16 1 124 %a = load <2 x i16>, <2 x i16> addrspace(1)* %in 125 %b = load <2 x i16>, <2 x i16> addrspace(1)* %b_ptr 126 %result = shl <2 x i16> %a, %b 127 store <2 x i16> %result, <2 x i16> addrspace(1)* %out 128 ret void 129} 130 131; GCN-LABEL: {{^}}shl_v4i16: 132; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 133; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 134; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 135; VI: v_lshlrev_b16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 136define void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 137 %tid = call i32 @llvm.r600.read.tidig.x() #0 138 %gep = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %in, i32 %tid 139 %gep.out = getelementptr inbounds <4 x i16>, <4 x i16> addrspace(1)* %out, i32 %tid 140 %b_ptr = getelementptr <4 x i16>, <4 x i16> addrspace(1)* %gep, i16 1 141 %a = load <4 x i16>, <4 x i16> addrspace(1)* %gep 142 %b = load <4 x i16>, <4 x i16> addrspace(1)* %b_ptr 143 %result = shl <4 x i16> %a, %b 144 store <4 x i16> %result, <4 x i16> addrspace(1)* %gep.out 145 ret void 146} 147 148;EG-LABEL: {{^}}shl_i64: 149;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] 150;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} 151;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal 152;EG-DAG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 153;EG-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]] 154;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}} 155;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]|PV.[XYZW]}} 156;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal 157;EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} 158;EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0 159 160; GCN-LABEL: {{^}}shl_i64: 161; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} 162; VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} 163define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { 164 %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 165 %a = load i64, i64 addrspace(1)* %in 166 %b = load i64, i64 addrspace(1)* %b_ptr 167 %result = shl i64 %a, %b 168 store i64 %result, i64 addrspace(1)* %out 169 ret void 170} 171 172;EG-LABEL: {{^}}shl_v2i64: 173;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] 174;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] 175;EG-DAG: LSHR {{\*? *}}[[COMPSHA]] 176;EG-DAG: LSHR {{\*? *}}[[COMPSHB]] 177;EG-DAG: LSHR {{.*}}, 1 178;EG-DAG: LSHR {{.*}}, 1 179;EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal 180;EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal 181;EG-DAG: LSHL {{.*}}, [[SHA]] 182;EG-DAG: LSHL {{.*}}, [[SHB]] 183;EG-DAG: LSHL {{.*}}, [[SHA]] 184;EG-DAG: LSHL {{.*}}, [[SHB]] 185;EG-DAG: LSHL 186;EG-DAG: LSHL 187;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal 188;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal 189;EG-DAG: CNDE_INT {{.*}}, 0.0 190;EG-DAG: CNDE_INT {{.*}}, 0.0 191;EG-DAG: CNDE_INT 192;EG-DAG: CNDE_INT 193 194;SI: {{^}}shl_v2i64: 195;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} 196;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} 197 198;VI: {{^}}shl_v2i64: 199;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} 200;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} 201 202define void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %in) { 203 %b_ptr = getelementptr <2 x i64>, <2 x i64> addrspace(1)* %in, i64 1 204 %a = load <2 x i64>, <2 x i64> addrspace(1)* %in 205 %b = load <2 x i64>, <2 x i64> addrspace(1)* %b_ptr 206 %result = shl <2 x i64> %a, %b 207 store <2 x i64> %result, <2 x i64> addrspace(1)* %out 208 ret void 209} 210 211;EG: {{^}}shl_v4i64: 212;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] 213;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] 214;EG-DAG: SUB_INT {{\*? *}}[[COMPSHC:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHC:T[0-9]+\.[XYZW]]] 215;EG-DAG: SUB_INT {{\*? *}}[[COMPSHD:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHD:T[0-9]+\.[XYZW]]] 216;EG-DAG: LSHR {{\*? *}}[[COMPSHA]] 217;EG-DAG: LSHR {{\*? *}}[[COMPSHB]] 218;EG-DAG: LSHR {{\*? *}}[[COMPSHC]] 219;EG-DAG: LSHR {{\*? *}}[[COMPSHD]] 220;EG-DAG: LSHR {{.*}}, 1 221;EG-DAG: LSHR {{.*}}, 1 222;EG-DAG: LSHR {{.*}}, 1 223;EG-DAG: LSHR {{.*}}, 1 224;EG-DAG: ADD_INT {{\*? *}}[[BIGSHA:T[0-9]+\.[XYZW]]]{{.*}}, literal 225;EG-DAG: ADD_INT {{\*? *}}[[BIGSHB:T[0-9]+\.[XYZW]]]{{.*}}, literal 226;EG-DAG: ADD_INT {{\*? *}}[[BIGSHC:T[0-9]+\.[XYZW]]]{{.*}}, literal 227;EG-DAG: ADD_INT {{\*? *}}[[BIGSHD:T[0-9]+\.[XYZW]]]{{.*}}, literal 228;EG-DAG: LSHL {{.*}}, [[SHA]] 229;EG-DAG: LSHL {{.*}}, [[SHB]] 230;EG-DAG: LSHL {{.*}}, [[SHC]] 231;EG-DAG: LSHL {{.*}}, [[SHD]] 232;EG-DAG: LSHL {{.*}}, [[SHA]] 233;EG-DAG: LSHL {{.*}}, [[SHB]] 234;EG-DAG: LSHL {{.*}}, [[SHC]] 235;EG-DAG: LSHL {{.*}}, [[SHD]] 236;EG-DAG: LSHL 237;EG-DAG: LSHL 238;EG-DAG: LSHL 239;EG-DAG: LSHL 240;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHA]], literal 241;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHB]], literal 242;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHC]], literal 243;EG-DAG: SETGT_UINT {{\*? *T[0-9]\.[XYZW]}}, [[SHD]], literal 244;EG-DAG: CNDE_INT {{.*}}, 0.0 245;EG-DAG: CNDE_INT {{.*}}, 0.0 246;EG-DAG: CNDE_INT {{.*}}, 0.0 247;EG-DAG: CNDE_INT {{.*}}, 0.0 248;EG-DAG: CNDE_INT 249;EG-DAG: CNDE_INT 250;EG-DAG: CNDE_INT 251;EG-DAG: CNDE_INT 252 253;SI: {{^}}shl_v4i64: 254;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} 255;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} 256;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} 257;SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v[0-9]+}} 258 259;VI: {{^}}shl_v4i64: 260;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} 261;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} 262;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} 263;VI: v_lshlrev_b64 {{v\[[0-9]+:[0-9]+\], v[0-9]+, v\[[0-9]+:[0-9]+\]}} 264 265define void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) { 266 %b_ptr = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i64 1 267 %a = load <4 x i64>, <4 x i64> addrspace(1)* %in 268 %b = load <4 x i64>, <4 x i64> addrspace(1)* %b_ptr 269 %result = shl <4 x i64> %a, %b 270 store <4 x i64> %result, <4 x i64> addrspace(1)* %out 271 ret void 272} 273 274; Make sure load width gets reduced to i32 load. 275; GCN-LABEL: {{^}}s_shl_32_i64: 276; GCN-DAG: s_load_dword [[LO_A:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}} 277; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 0{{$}} 278; GCN-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], [[LO_A]] 279; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} 280define void @s_shl_32_i64(i64 addrspace(1)* %out, i64 %a) { 281 %result = shl i64 %a, 32 282 store i64 %result, i64 addrspace(1)* %out 283 ret void 284} 285 286; GCN-LABEL: {{^}}v_shl_32_i64: 287; GCN-DAG: buffer_load_dword v[[LO_A:[0-9]+]], 288; GCN-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], 0{{$}} 289; GCN: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[LO_A]]{{\]}} 290define void @v_shl_32_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { 291 %tid = call i32 @llvm.r600.read.tidig.x() #0 292 %gep.in = getelementptr i64, i64 addrspace(1)* %in, i32 %tid 293 %gep.out = getelementptr i64, i64 addrspace(1)* %out, i32 %tid 294 %a = load i64, i64 addrspace(1)* %gep.in 295 %result = shl i64 %a, 32 296 store i64 %result, i64 addrspace(1)* %gep.out 297 ret void 298} 299 300; FUNC-LABEL: {{^}}s_shl_constant_i64 301; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} 302define void @s_shl_constant_i64(i64 addrspace(1)* %out, i64 %a) { 303 %shl = shl i64 281474976710655, %a 304 store i64 %shl, i64 addrspace(1)* %out, align 8 305 ret void 306} 307 308; FUNC-LABEL: {{^}}v_shl_constant_i64: 309; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]] 310; SI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0xab19b207 311; SI-DAG: s_movk_i32 s[[KHI:[0-9]+]], 0x11e{{$}} 312; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\]}}, s{{\[}}[[KLO]]:[[KHI]]{{\]}}, [[VAL]] 313; SI: buffer_store_dwordx2 314define void @v_shl_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { 315 %a = load i64, i64 addrspace(1)* %aptr, align 8 316 %shl = shl i64 1231231234567, %a 317 store i64 %shl, i64 addrspace(1)* %out, align 8 318 ret void 319} 320 321; FUNC-LABEL: {{^}}v_shl_i64_32_bit_constant: 322; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]] 323; SI-DAG: s_mov_b32 s[[KLO:[0-9]+]], 0x12d687{{$}} 324; SI-DAG: s_mov_b32 s[[KHI:[0-9]+]], 0{{$}} 325; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\]}}, s{{\[}}[[KLO]]:[[KHI]]{{\]}}, [[VAL]] 326define void @v_shl_i64_32_bit_constant(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { 327 %a = load i64, i64 addrspace(1)* %aptr, align 8 328 %shl = shl i64 1234567, %a 329 store i64 %shl, i64 addrspace(1)* %out, align 8 330 ret void 331} 332 333; FUNC-LABEL: {{^}}v_shl_inline_imm_64_i64: 334; SI: v_lshl_b64 {{v\[[0-9]+:[0-9]+\]}}, 64, {{v[0-9]+}} 335define void @v_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) { 336 %a = load i64, i64 addrspace(1)* %aptr, align 8 337 %shl = shl i64 64, %a 338 store i64 %shl, i64 addrspace(1)* %out, align 8 339 ret void 340} 341 342; FUNC-LABEL: {{^}}s_shl_inline_imm_64_i64: 343; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 64, s{{[0-9]+}} 344define void @s_shl_inline_imm_64_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 345 %shl = shl i64 64, %a 346 store i64 %shl, i64 addrspace(1)* %out, align 8 347 ret void 348} 349 350; FUNC-LABEL: {{^}}s_shl_inline_imm_1_i64: 351; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 1, s{{[0-9]+}} 352define void @s_shl_inline_imm_1_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 353 %shl = shl i64 1, %a 354 store i64 %shl, i64 addrspace(1)* %out, align 8 355 ret void 356} 357 358; FUNC-LABEL: {{^}}s_shl_inline_imm_1.0_i64: 359; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 1.0, s{{[0-9]+}} 360define void @s_shl_inline_imm_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 361 %shl = shl i64 4607182418800017408, %a 362 store i64 %shl, i64 addrspace(1)* %out, align 8 363 ret void 364} 365 366; FUNC-LABEL: {{^}}s_shl_inline_imm_neg_1.0_i64: 367; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, -1.0, s{{[0-9]+}} 368define void @s_shl_inline_imm_neg_1.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 369 %shl = shl i64 13830554455654793216, %a 370 store i64 %shl, i64 addrspace(1)* %out, align 8 371 ret void 372} 373 374; FUNC-LABEL: {{^}}s_shl_inline_imm_0.5_i64: 375; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 0.5, s{{[0-9]+}} 376define void @s_shl_inline_imm_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 377 %shl = shl i64 4602678819172646912, %a 378 store i64 %shl, i64 addrspace(1)* %out, align 8 379 ret void 380} 381 382; FUNC-LABEL: {{^}}s_shl_inline_imm_neg_0.5_i64: 383; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, -0.5, s{{[0-9]+}} 384define void @s_shl_inline_imm_neg_0.5_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 385 %shl = shl i64 13826050856027422720, %a 386 store i64 %shl, i64 addrspace(1)* %out, align 8 387 ret void 388} 389 390; FUNC-LABEL: {{^}}s_shl_inline_imm_2.0_i64: 391; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 2.0, s{{[0-9]+}} 392define void @s_shl_inline_imm_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 393 %shl = shl i64 4611686018427387904, %a 394 store i64 %shl, i64 addrspace(1)* %out, align 8 395 ret void 396} 397 398; FUNC-LABEL: {{^}}s_shl_inline_imm_neg_2.0_i64: 399; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, -2.0, s{{[0-9]+}} 400define void @s_shl_inline_imm_neg_2.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 401 %shl = shl i64 13835058055282163712, %a 402 store i64 %shl, i64 addrspace(1)* %out, align 8 403 ret void 404} 405 406; FUNC-LABEL: {{^}}s_shl_inline_imm_4.0_i64: 407; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, 4.0, s{{[0-9]+}} 408define void @s_shl_inline_imm_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 409 %shl = shl i64 4616189618054758400, %a 410 store i64 %shl, i64 addrspace(1)* %out, align 8 411 ret void 412} 413 414; FUNC-LABEL: {{^}}s_shl_inline_imm_neg_4.0_i64: 415; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, -4.0, s{{[0-9]+}} 416define void @s_shl_inline_imm_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 417 %shl = shl i64 13839561654909534208, %a 418 store i64 %shl, i64 addrspace(1)* %out, align 8 419 ret void 420} 421 422 423; Test with the 64-bit integer bitpattern for a 32-bit float in the 424; low 32-bits, which is not a valid 64-bit inline immmediate. 425 426; FUNC-LABEL: {{^}}s_shl_inline_imm_f32_4.0_i64: 427; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 4.0 428; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0{{$}} 429; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, s{{[0-9]+}} 430define void @s_shl_inline_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 431 %shl = shl i64 1082130432, %a 432 store i64 %shl, i64 addrspace(1)* %out, align 8 433 ret void 434} 435 436; FIXME: Copy of -1 register 437; FUNC-LABEL: {{^}}s_shl_inline_imm_f32_neg_4.0_i64: 438; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], -4.0 439; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -1{{$}} 440; SI-DAG: s_mov_b32 s[[K_HI_COPY:[0-9]+]], s[[K_HI]] 441; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI_COPY]]{{\]}}, s{{[0-9]+}} 442define void @s_shl_inline_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 443 %shl = shl i64 -1065353216, %a 444 store i64 %shl, i64 addrspace(1)* %out, align 8 445 ret void 446} 447 448; Shift into upper 32-bits 449; FUNC-LABEL: {{^}}s_shl_inline_high_imm_f32_4.0_i64: 450; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 4.0 451; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}} 452; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, s{{[0-9]+}} 453define void @s_shl_inline_high_imm_f32_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 454 %shl = shl i64 4647714815446351872, %a 455 store i64 %shl, i64 addrspace(1)* %out, align 8 456 ret void 457} 458 459; FUNC-LABEL: {{^}}s_shl_inline_high_imm_f32_neg_4.0_i64: 460; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], -4.0 461; SI-DAG: s_mov_b32 s[[K_LO:[0-9]+]], 0{{$}} 462; SI: s_lshl_b64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, s{{[0-9]+}} 463define void @s_shl_inline_high_imm_f32_neg_4.0_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) { 464 %shl = shl i64 13871086852301127680, %a 465 store i64 %shl, i64 addrspace(1)* %out, align 8 466 ret void 467} 468 469attributes #0 = { nounwind readnone } 470