1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX89,SI 3; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX89,VI 4; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX89,GFX9 5; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,R600 6 7declare i32 @llvm.fshr.i32(i32, i32, i32) 8declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) 9declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) 10declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 11declare i16 @llvm.fshr.i16(i16, i16, i16) 12declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) 13declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) 14declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) 15declare i64 @llvm.fshr.i64(i64, i64, i64) 16declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) 17declare i24 @llvm.fshr.i24(i24, i24, i24) 18declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) 19 20define amdgpu_kernel void @fshr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %z) { 21; SI-LABEL: fshr_i32: 22; SI: ; %bb.0: ; %entry 23; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 24; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xb 25; SI-NEXT: s_mov_b32 s7, 0xf000 26; SI-NEXT: s_mov_b32 s6, -1 27; SI-NEXT: s_waitcnt lgkmcnt(0) 28; SI-NEXT: v_mov_b32_e32 v0, s1 29; SI-NEXT: v_mov_b32_e32 v1, s2 30; SI-NEXT: v_alignbit_b32 v0, s0, v0, v1 31; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 32; SI-NEXT: s_endpgm 33; 34; VI-LABEL: fshr_i32: 35; VI: ; %bb.0: ; %entry 36; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 37; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c 38; VI-NEXT: s_waitcnt lgkmcnt(0) 39; VI-NEXT: v_mov_b32_e32 v0, s1 40; VI-NEXT: v_mov_b32_e32 v1, s2 41; VI-NEXT: v_alignbit_b32 v2, s0, v0, v1 42; VI-NEXT: v_mov_b32_e32 v0, s4 43; VI-NEXT: v_mov_b32_e32 v1, s5 44; VI-NEXT: flat_store_dword v[0:1], v2 45; VI-NEXT: s_endpgm 46; 47; GFX9-LABEL: fshr_i32: 48; GFX9: ; %bb.0: ; %entry 49; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 50; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c 51; GFX9-NEXT: s_waitcnt lgkmcnt(0) 52; GFX9-NEXT: v_mov_b32_e32 v0, s1 53; GFX9-NEXT: v_mov_b32_e32 v1, s2 54; GFX9-NEXT: v_alignbit_b32 v2, s0, v0, v1 55; GFX9-NEXT: v_mov_b32_e32 v0, s4 56; GFX9-NEXT: v_mov_b32_e32 v1, s5 57; GFX9-NEXT: global_store_dword v[0:1], v2, off 58; GFX9-NEXT: s_endpgm 59; 60; R600-LABEL: fshr_i32: 61; R600: ; %bb.0: ; %entry 62; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 63; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 64; R600-NEXT: CF_END 65; R600-NEXT: PAD 66; R600-NEXT: ALU clause starting at 4: 67; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 68; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 69; R600-NEXT: BIT_ALIGN_INT * T1.X, KC0[2].Z, KC0[2].W, KC0[3].X, 70entry: 71 %0 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 72 store i32 %0, i32 addrspace(1)* %in 73 ret void 74} 75 76define amdgpu_kernel void @fshr_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) { 77; SI-LABEL: fshr_i32_imm: 78; SI: ; %bb.0: ; %entry 79; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 80; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 81; SI-NEXT: s_mov_b32 s7, 0xf000 82; SI-NEXT: s_mov_b32 s6, -1 83; SI-NEXT: s_waitcnt lgkmcnt(0) 84; SI-NEXT: v_mov_b32_e32 v0, s1 85; SI-NEXT: v_alignbit_b32 v0, s0, v0, 7 86; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 87; SI-NEXT: s_endpgm 88; 89; VI-LABEL: fshr_i32_imm: 90; VI: ; %bb.0: ; %entry 91; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 92; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c 93; VI-NEXT: s_waitcnt lgkmcnt(0) 94; VI-NEXT: v_mov_b32_e32 v0, s1 95; VI-NEXT: v_alignbit_b32 v2, s0, v0, 7 96; VI-NEXT: v_mov_b32_e32 v0, s2 97; VI-NEXT: v_mov_b32_e32 v1, s3 98; VI-NEXT: flat_store_dword v[0:1], v2 99; VI-NEXT: s_endpgm 100; 101; GFX9-LABEL: fshr_i32_imm: 102; GFX9: ; %bb.0: ; %entry 103; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 104; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c 105; GFX9-NEXT: s_waitcnt lgkmcnt(0) 106; GFX9-NEXT: v_mov_b32_e32 v0, s1 107; GFX9-NEXT: v_alignbit_b32 v2, s0, v0, 7 108; GFX9-NEXT: v_mov_b32_e32 v0, s2 109; GFX9-NEXT: v_mov_b32_e32 v1, s3 110; GFX9-NEXT: global_store_dword v[0:1], v2, off 111; GFX9-NEXT: s_endpgm 112; 113; R600-LABEL: fshr_i32_imm: 114; R600: ; %bb.0: ; %entry 115; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 116; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 117; R600-NEXT: CF_END 118; R600-NEXT: PAD 119; R600-NEXT: ALU clause starting at 4: 120; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 121; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 122; R600-NEXT: BIT_ALIGN_INT * T1.X, KC0[2].Z, KC0[2].W, literal.x, 123; R600-NEXT: 7(9.809089e-45), 0(0.000000e+00) 124entry: 125 %0 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 7) 126 store i32 %0, i32 addrspace(1)* %in 127 ret void 128} 129 130define amdgpu_kernel void @fshr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { 131; SI-LABEL: fshr_v2i32: 132; SI: ; %bb.0: ; %entry 133; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 134; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb 135; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd 136; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xf 137; SI-NEXT: s_mov_b32 s7, 0xf000 138; SI-NEXT: s_mov_b32 s6, -1 139; SI-NEXT: s_waitcnt lgkmcnt(0) 140; SI-NEXT: v_mov_b32_e32 v0, s9 141; SI-NEXT: v_mov_b32_e32 v1, s1 142; SI-NEXT: v_alignbit_b32 v1, s3, v0, v1 143; SI-NEXT: v_mov_b32_e32 v0, s8 144; SI-NEXT: v_mov_b32_e32 v2, s0 145; SI-NEXT: v_alignbit_b32 v0, s2, v0, v2 146; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 147; SI-NEXT: s_endpgm 148; 149; VI-LABEL: fshr_v2i32: 150; VI: ; %bb.0: ; %entry 151; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 152; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c 153; VI-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x34 154; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c 155; VI-NEXT: s_waitcnt lgkmcnt(0) 156; VI-NEXT: v_mov_b32_e32 v0, s7 157; VI-NEXT: v_mov_b32_e32 v1, s1 158; VI-NEXT: v_alignbit_b32 v1, s5, v0, v1 159; VI-NEXT: v_mov_b32_e32 v0, s6 160; VI-NEXT: v_mov_b32_e32 v2, s0 161; VI-NEXT: v_alignbit_b32 v0, s4, v0, v2 162; VI-NEXT: v_mov_b32_e32 v2, s2 163; VI-NEXT: v_mov_b32_e32 v3, s3 164; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 165; VI-NEXT: s_endpgm 166; 167; GFX9-LABEL: fshr_v2i32: 168; GFX9: ; %bb.0: ; %entry 169; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 170; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c 171; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x34 172; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c 173; GFX9-NEXT: s_waitcnt lgkmcnt(0) 174; GFX9-NEXT: v_mov_b32_e32 v0, s7 175; GFX9-NEXT: v_mov_b32_e32 v1, s1 176; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, v1 177; GFX9-NEXT: v_mov_b32_e32 v0, s6 178; GFX9-NEXT: v_mov_b32_e32 v2, s0 179; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, v2 180; GFX9-NEXT: v_mov_b32_e32 v2, s2 181; GFX9-NEXT: v_mov_b32_e32 v3, s3 182; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off 183; GFX9-NEXT: s_endpgm 184; 185; R600-LABEL: fshr_v2i32: 186; R600: ; %bb.0: ; %entry 187; R600-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] 188; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 189; R600-NEXT: CF_END 190; R600-NEXT: PAD 191; R600-NEXT: ALU clause starting at 4: 192; R600-NEXT: MOV * T0.W, KC0[4].X, 193; R600-NEXT: BIT_ALIGN_INT T0.Y, KC0[3].X, KC0[3].Z, PV.W, 194; R600-NEXT: MOV * T0.W, KC0[3].W, 195; R600-NEXT: BIT_ALIGN_INT * T0.X, KC0[2].W, KC0[3].Y, PV.W, 196; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 197; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 198entry: 199 %0 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) 200 store <2 x i32> %0, <2 x i32> addrspace(1)* %in 201 ret void 202} 203 204define amdgpu_kernel void @fshr_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) { 205; SI-LABEL: fshr_v2i32_imm: 206; SI: ; %bb.0: ; %entry 207; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 208; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb 209; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd 210; SI-NEXT: s_mov_b32 s7, 0xf000 211; SI-NEXT: s_mov_b32 s6, -1 212; SI-NEXT: s_waitcnt lgkmcnt(0) 213; SI-NEXT: v_mov_b32_e32 v0, s1 214; SI-NEXT: v_alignbit_b32 v1, s3, v0, 9 215; SI-NEXT: v_mov_b32_e32 v0, s0 216; SI-NEXT: v_alignbit_b32 v0, s2, v0, 7 217; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 218; SI-NEXT: s_endpgm 219; 220; VI-LABEL: fshr_v2i32_imm: 221; VI: ; %bb.0: ; %entry 222; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 223; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c 224; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 225; VI-NEXT: s_waitcnt lgkmcnt(0) 226; VI-NEXT: v_mov_b32_e32 v0, s1 227; VI-NEXT: v_mov_b32_e32 v2, s0 228; VI-NEXT: v_alignbit_b32 v1, s5, v0, 9 229; VI-NEXT: v_alignbit_b32 v0, s4, v2, 7 230; VI-NEXT: v_mov_b32_e32 v2, s2 231; VI-NEXT: v_mov_b32_e32 v3, s3 232; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 233; VI-NEXT: s_endpgm 234; 235; GFX9-LABEL: fshr_v2i32_imm: 236; GFX9: ; %bb.0: ; %entry 237; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 238; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c 239; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 240; GFX9-NEXT: s_waitcnt lgkmcnt(0) 241; GFX9-NEXT: v_mov_b32_e32 v0, s1 242; GFX9-NEXT: v_mov_b32_e32 v2, s0 243; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, 9 244; GFX9-NEXT: v_alignbit_b32 v0, s4, v2, 7 245; GFX9-NEXT: v_mov_b32_e32 v2, s2 246; GFX9-NEXT: v_mov_b32_e32 v3, s3 247; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off 248; GFX9-NEXT: s_endpgm 249; 250; R600-LABEL: fshr_v2i32_imm: 251; R600: ; %bb.0: ; %entry 252; R600-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] 253; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 254; R600-NEXT: CF_END 255; R600-NEXT: PAD 256; R600-NEXT: ALU clause starting at 4: 257; R600-NEXT: BIT_ALIGN_INT * T0.Y, KC0[3].X, KC0[3].Z, literal.x, 258; R600-NEXT: 9(1.261169e-44), 0(0.000000e+00) 259; R600-NEXT: BIT_ALIGN_INT * T0.X, KC0[2].W, KC0[3].Y, literal.x, 260; R600-NEXT: 7(9.809089e-45), 0(0.000000e+00) 261; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 262; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 263entry: 264 %0 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 7, i32 9>) 265 store <2 x i32> %0, <2 x i32> addrspace(1)* %in 266 ret void 267} 268 269define amdgpu_kernel void @fshr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { 270; SI-LABEL: fshr_v4i32: 271; SI: ; %bb.0: ; %entry 272; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 273; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd 274; SI-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x11 275; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x15 276; SI-NEXT: s_mov_b32 s7, 0xf000 277; SI-NEXT: s_mov_b32 s6, -1 278; SI-NEXT: s_waitcnt lgkmcnt(0) 279; SI-NEXT: v_mov_b32_e32 v0, s15 280; SI-NEXT: v_mov_b32_e32 v1, s3 281; SI-NEXT: v_alignbit_b32 v3, s11, v0, v1 282; SI-NEXT: v_mov_b32_e32 v0, s14 283; SI-NEXT: v_mov_b32_e32 v1, s2 284; SI-NEXT: v_alignbit_b32 v2, s10, v0, v1 285; SI-NEXT: v_mov_b32_e32 v0, s13 286; SI-NEXT: v_mov_b32_e32 v1, s1 287; SI-NEXT: v_alignbit_b32 v1, s9, v0, v1 288; SI-NEXT: v_mov_b32_e32 v0, s12 289; SI-NEXT: v_mov_b32_e32 v4, s0 290; SI-NEXT: v_alignbit_b32 v0, s8, v0, v4 291; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 292; SI-NEXT: s_endpgm 293; 294; VI-LABEL: fshr_v4i32: 295; VI: ; %bb.0: ; %entry 296; VI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x24 297; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 298; VI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x44 299; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x54 300; VI-NEXT: s_waitcnt lgkmcnt(0) 301; VI-NEXT: v_mov_b32_e32 v0, s11 302; VI-NEXT: v_mov_b32_e32 v1, s3 303; VI-NEXT: v_alignbit_b32 v3, s7, v0, v1 304; VI-NEXT: v_mov_b32_e32 v0, s10 305; VI-NEXT: v_mov_b32_e32 v1, s2 306; VI-NEXT: v_alignbit_b32 v2, s6, v0, v1 307; VI-NEXT: v_mov_b32_e32 v0, s9 308; VI-NEXT: v_mov_b32_e32 v1, s1 309; VI-NEXT: v_alignbit_b32 v1, s5, v0, v1 310; VI-NEXT: v_mov_b32_e32 v0, s8 311; VI-NEXT: v_mov_b32_e32 v4, s0 312; VI-NEXT: v_alignbit_b32 v0, s4, v0, v4 313; VI-NEXT: v_mov_b32_e32 v4, s12 314; VI-NEXT: v_mov_b32_e32 v5, s13 315; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 316; VI-NEXT: s_endpgm 317; 318; GFX9-LABEL: fshr_v4i32: 319; GFX9: ; %bb.0: ; %entry 320; GFX9-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x24 321; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 322; GFX9-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x44 323; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x54 324; GFX9-NEXT: s_waitcnt lgkmcnt(0) 325; GFX9-NEXT: v_mov_b32_e32 v0, s11 326; GFX9-NEXT: v_mov_b32_e32 v1, s3 327; GFX9-NEXT: v_alignbit_b32 v3, s7, v0, v1 328; GFX9-NEXT: v_mov_b32_e32 v0, s10 329; GFX9-NEXT: v_mov_b32_e32 v1, s2 330; GFX9-NEXT: v_alignbit_b32 v2, s6, v0, v1 331; GFX9-NEXT: v_mov_b32_e32 v0, s9 332; GFX9-NEXT: v_mov_b32_e32 v1, s1 333; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, v1 334; GFX9-NEXT: v_mov_b32_e32 v0, s8 335; GFX9-NEXT: v_mov_b32_e32 v4, s0 336; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, v4 337; GFX9-NEXT: v_mov_b32_e32 v4, s12 338; GFX9-NEXT: v_mov_b32_e32 v5, s13 339; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off 340; GFX9-NEXT: s_endpgm 341; 342; R600-LABEL: fshr_v4i32: 343; R600: ; %bb.0: ; %entry 344; R600-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[] 345; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 346; R600-NEXT: CF_END 347; R600-NEXT: PAD 348; R600-NEXT: ALU clause starting at 4: 349; R600-NEXT: MOV * T0.W, KC0[6].X, 350; R600-NEXT: BIT_ALIGN_INT * T0.W, KC0[4].X, KC0[5].X, PV.W, 351; R600-NEXT: MOV * T1.W, KC0[5].W, 352; R600-NEXT: BIT_ALIGN_INT * T0.Z, KC0[3].W, KC0[4].W, PV.W, 353; R600-NEXT: MOV * T1.W, KC0[5].Z, 354; R600-NEXT: BIT_ALIGN_INT * T0.Y, KC0[3].Z, KC0[4].Z, PV.W, 355; R600-NEXT: MOV * T1.W, KC0[5].Y, 356; R600-NEXT: BIT_ALIGN_INT * T0.X, KC0[3].Y, KC0[4].Y, PV.W, 357; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 358; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 359entry: 360 %0 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) 361 store <4 x i32> %0, <4 x i32> addrspace(1)* %in 362 ret void 363} 364 365define amdgpu_kernel void @fshr_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) { 366; SI-LABEL: fshr_v4i32_imm: 367; SI: ; %bb.0: ; %entry 368; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 369; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd 370; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x11 371; SI-NEXT: s_mov_b32 s7, 0xf000 372; SI-NEXT: s_mov_b32 s6, -1 373; SI-NEXT: s_waitcnt lgkmcnt(0) 374; SI-NEXT: v_mov_b32_e32 v0, s3 375; SI-NEXT: v_alignbit_b32 v3, s11, v0, 1 376; SI-NEXT: v_mov_b32_e32 v0, s2 377; SI-NEXT: v_alignbit_b32 v2, s10, v0, 9 378; SI-NEXT: v_mov_b32_e32 v0, s1 379; SI-NEXT: v_alignbit_b32 v1, s9, v0, 7 380; SI-NEXT: v_mov_b32_e32 v0, s0 381; SI-NEXT: v_alignbit_b32 v0, s8, v0, 1 382; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 383; SI-NEXT: s_endpgm 384; 385; VI-LABEL: fshr_v4i32_imm: 386; VI: ; %bb.0: ; %entry 387; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 388; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 389; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x44 390; VI-NEXT: s_waitcnt lgkmcnt(0) 391; VI-NEXT: v_mov_b32_e32 v4, s8 392; VI-NEXT: v_mov_b32_e32 v5, s9 393; VI-NEXT: v_mov_b32_e32 v0, s3 394; VI-NEXT: v_mov_b32_e32 v1, s2 395; VI-NEXT: v_alignbit_b32 v3, s7, v0, 1 396; VI-NEXT: v_mov_b32_e32 v0, s1 397; VI-NEXT: v_alignbit_b32 v2, s6, v1, 9 398; VI-NEXT: v_alignbit_b32 v1, s5, v0, 7 399; VI-NEXT: v_mov_b32_e32 v0, s0 400; VI-NEXT: v_alignbit_b32 v0, s4, v0, 1 401; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 402; VI-NEXT: s_endpgm 403; 404; GFX9-LABEL: fshr_v4i32_imm: 405; GFX9: ; %bb.0: ; %entry 406; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 407; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 408; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x44 409; GFX9-NEXT: s_waitcnt lgkmcnt(0) 410; GFX9-NEXT: v_mov_b32_e32 v4, s8 411; GFX9-NEXT: v_mov_b32_e32 v5, s9 412; GFX9-NEXT: v_mov_b32_e32 v0, s3 413; GFX9-NEXT: v_mov_b32_e32 v1, s2 414; GFX9-NEXT: v_alignbit_b32 v3, s7, v0, 1 415; GFX9-NEXT: v_mov_b32_e32 v0, s1 416; GFX9-NEXT: v_alignbit_b32 v2, s6, v1, 9 417; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, 7 418; GFX9-NEXT: v_mov_b32_e32 v0, s0 419; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, 1 420; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off 421; GFX9-NEXT: s_endpgm 422; 423; R600-LABEL: fshr_v4i32_imm: 424; R600: ; %bb.0: ; %entry 425; R600-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 426; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 427; R600-NEXT: CF_END 428; R600-NEXT: PAD 429; R600-NEXT: ALU clause starting at 4: 430; R600-NEXT: BIT_ALIGN_INT * T0.W, KC0[4].X, KC0[5].X, 1, 431; R600-NEXT: BIT_ALIGN_INT * T0.Z, KC0[3].W, KC0[4].W, literal.x, 432; R600-NEXT: 9(1.261169e-44), 0(0.000000e+00) 433; R600-NEXT: BIT_ALIGN_INT * T0.Y, KC0[3].Z, KC0[4].Z, literal.x, 434; R600-NEXT: 7(9.809089e-45), 0(0.000000e+00) 435; R600-NEXT: BIT_ALIGN_INT * T0.X, KC0[3].Y, KC0[4].Y, 1, 436; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 437; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 438entry: 439 %0 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 7, i32 9, i32 33>) 440 store <4 x i32> %0, <4 x i32> addrspace(1)* %in 441 ret void 442} 443 444define i32 @v_fshr_i32(i32 %src0, i32 %src1, i32 %src2) { 445; GFX89-LABEL: v_fshr_i32: 446; GFX89: ; %bb.0: 447; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 448; GFX89-NEXT: v_alignbit_b32 v0, v0, v1, v2 449; GFX89-NEXT: s_setpc_b64 s[30:31] 450; 451; R600-LABEL: v_fshr_i32: 452; R600: ; %bb.0: 453; R600-NEXT: CF_END 454; R600-NEXT: PAD 455 %ret = call i32 @llvm.fshr.i32(i32 %src0, i32 %src1, i32 %src2) 456 ret i32 %ret 457} 458 459define <2 x i32> @v_fshr_v2i32(<2 x i32> %src0, <2 x i32> %src1, <2 x i32> %src2) { 460; GFX89-LABEL: v_fshr_v2i32: 461; GFX89: ; %bb.0: 462; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 463; GFX89-NEXT: v_alignbit_b32 v0, v0, v2, v4 464; GFX89-NEXT: v_alignbit_b32 v1, v1, v3, v5 465; GFX89-NEXT: s_setpc_b64 s[30:31] 466; 467; R600-LABEL: v_fshr_v2i32: 468; R600: ; %bb.0: 469; R600-NEXT: CF_END 470; R600-NEXT: PAD 471 %ret = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %src0, <2 x i32> %src1, <2 x i32> %src2) 472 ret <2 x i32> %ret 473} 474 475define <3 x i32> @v_fshr_v3i32(<3 x i32> %src0, <3 x i32> %src1, <3 x i32> %src2) { 476; GFX89-LABEL: v_fshr_v3i32: 477; GFX89: ; %bb.0: 478; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 479; GFX89-NEXT: v_alignbit_b32 v0, v0, v3, v6 480; GFX89-NEXT: v_alignbit_b32 v1, v1, v4, v7 481; GFX89-NEXT: v_alignbit_b32 v2, v2, v5, v8 482; GFX89-NEXT: s_setpc_b64 s[30:31] 483; 484; R600-LABEL: v_fshr_v3i32: 485; R600: ; %bb.0: 486; R600-NEXT: CF_END 487; R600-NEXT: PAD 488 %ret = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> %src0, <3 x i32> %src1, <3 x i32> %src2) 489 ret <3 x i32> %ret 490} 491 492define <4 x i32> @v_fshr_v4i32(<4 x i32> %src0, <4 x i32> %src1, <4 x i32> %src2) { 493; GFX89-LABEL: v_fshr_v4i32: 494; GFX89: ; %bb.0: 495; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 496; GFX89-NEXT: v_alignbit_b32 v0, v0, v4, v8 497; GFX89-NEXT: v_alignbit_b32 v1, v1, v5, v9 498; GFX89-NEXT: v_alignbit_b32 v2, v2, v6, v10 499; GFX89-NEXT: v_alignbit_b32 v3, v3, v7, v11 500; GFX89-NEXT: s_setpc_b64 s[30:31] 501; 502; R600-LABEL: v_fshr_v4i32: 503; R600: ; %bb.0: 504; R600-NEXT: CF_END 505; R600-NEXT: PAD 506 %ret = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %src0, <4 x i32> %src1, <4 x i32> %src2) 507 ret <4 x i32> %ret 508} 509 510define i16 @v_fshr_i16(i16 %src0, i16 %src1, i16 %src2) { 511; SI-LABEL: v_fshr_i16: 512; SI: ; %bb.0: 513; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 514; SI-NEXT: v_or_b32_e32 v2, 16, v2 515; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 516; SI-NEXT: v_alignbit_b32 v0, v0, v1, v2 517; SI-NEXT: s_setpc_b64 s[30:31] 518; 519; VI-LABEL: v_fshr_i16: 520; VI: ; %bb.0: 521; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 522; VI-NEXT: v_xor_b32_e32 v3, -1, v2 523; VI-NEXT: v_and_b32_e32 v2, 15, v2 524; VI-NEXT: v_lshlrev_b16_e32 v0, 1, v0 525; VI-NEXT: v_and_b32_e32 v3, 15, v3 526; VI-NEXT: v_lshlrev_b16_e32 v0, v3, v0 527; VI-NEXT: v_lshrrev_b16_e32 v1, v2, v1 528; VI-NEXT: v_or_b32_e32 v0, v0, v1 529; VI-NEXT: s_setpc_b64 s[30:31] 530; 531; GFX9-LABEL: v_fshr_i16: 532; GFX9: ; %bb.0: 533; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 534; GFX9-NEXT: v_xor_b32_e32 v3, -1, v2 535; GFX9-NEXT: v_and_b32_e32 v2, 15, v2 536; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 537; GFX9-NEXT: v_and_b32_e32 v3, 15, v3 538; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0 539; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1 540; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 541; GFX9-NEXT: s_setpc_b64 s[30:31] 542; 543; R600-LABEL: v_fshr_i16: 544; R600: ; %bb.0: 545; R600-NEXT: CF_END 546; R600-NEXT: PAD 547 %ret = call i16 @llvm.fshr.i16(i16 %src0, i16 %src1, i16 %src2) 548 ret i16 %ret 549} 550 551define <2 x i16> @v_fshr_v2i16(<2 x i16> %src0, <2 x i16> %src1, <2 x i16> %src2) { 552; SI-LABEL: v_fshr_v2i16: 553; SI: ; %bb.0: 554; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 555; SI-NEXT: v_or_b32_e32 v5, 16, v5 556; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 557; SI-NEXT: v_alignbit_b32 v1, v1, v3, v5 558; SI-NEXT: v_or_b32_e32 v3, 16, v4 559; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 560; SI-NEXT: v_alignbit_b32 v0, v0, v2, v3 561; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 562; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 563; SI-NEXT: v_or_b32_e32 v0, v0, v1 564; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 565; SI-NEXT: s_setpc_b64 s[30:31] 566; 567; VI-LABEL: v_fshr_v2i16: 568; VI: ; %bb.0: 569; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 570; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 571; VI-NEXT: v_and_b32_e32 v4, 15, v3 572; VI-NEXT: v_mov_b32_e32 v5, 1 573; VI-NEXT: v_xor_b32_e32 v3, -1, v3 574; VI-NEXT: v_lshlrev_b16_sdwa v5, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 575; VI-NEXT: v_and_b32_e32 v3, 15, v3 576; VI-NEXT: v_lshrrev_b16_sdwa v4, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 577; VI-NEXT: v_lshlrev_b16_e32 v3, v3, v5 578; VI-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 579; VI-NEXT: v_xor_b32_e32 v4, -1, v2 580; VI-NEXT: v_and_b32_e32 v2, 15, v2 581; VI-NEXT: v_lshlrev_b16_e32 v0, 1, v0 582; VI-NEXT: v_and_b32_e32 v4, 15, v4 583; VI-NEXT: v_lshlrev_b16_e32 v0, v4, v0 584; VI-NEXT: v_lshrrev_b16_e32 v1, v2, v1 585; VI-NEXT: v_or_b32_e32 v0, v0, v1 586; VI-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 587; VI-NEXT: s_setpc_b64 s[30:31] 588; 589; GFX9-LABEL: v_fshr_v2i16: 590; GFX9: ; %bb.0: 591; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 592; GFX9-NEXT: v_xor_b32_e32 v3, -1, v2 593; GFX9-NEXT: s_mov_b32 s4, 0xf000f 594; GFX9-NEXT: v_and_b32_e32 v2, s4, v2 595; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 596; GFX9-NEXT: v_and_b32_e32 v3, s4, v3 597; GFX9-NEXT: v_pk_lshlrev_b16 v0, v3, v0 598; GFX9-NEXT: v_pk_lshrrev_b16 v1, v2, v1 599; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 600; GFX9-NEXT: s_setpc_b64 s[30:31] 601; 602; R600-LABEL: v_fshr_v2i16: 603; R600: ; %bb.0: 604; R600-NEXT: CF_END 605; R600-NEXT: PAD 606 %ret = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %src0, <2 x i16> %src1, <2 x i16> %src2) 607 ret <2 x i16> %ret 608} 609 610define <3 x i16> @v_fshr_v3i16(<3 x i16> %src0, <3 x i16> %src1, <3 x i16> %src2) { 611; SI-LABEL: v_fshr_v3i16: 612; SI: ; %bb.0: 613; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 614; SI-NEXT: v_or_b32_e32 v7, 16, v7 615; SI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 616; SI-NEXT: v_alignbit_b32 v1, v1, v4, v7 617; SI-NEXT: v_or_b32_e32 v4, 16, v6 618; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 619; SI-NEXT: v_alignbit_b32 v0, v0, v3, v4 620; SI-NEXT: s_mov_b32 s4, 0xffff 621; SI-NEXT: v_or_b32_e32 v3, 16, v8 622; SI-NEXT: v_lshlrev_b32_e32 v4, 16, v5 623; SI-NEXT: v_alignbit_b32 v3, v2, v4, v3 624; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 625; SI-NEXT: v_and_b32_e32 v0, s4, v0 626; SI-NEXT: v_or_b32_e32 v0, v0, v1 627; SI-NEXT: v_and_b32_e32 v2, s4, v3 628; SI-NEXT: v_alignbit_b32 v1, v3, v1, 16 629; SI-NEXT: s_setpc_b64 s[30:31] 630; 631; VI-LABEL: v_fshr_v3i16: 632; VI: ; %bb.0: 633; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 634; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v4 635; VI-NEXT: v_and_b32_e32 v7, 15, v6 636; VI-NEXT: v_mov_b32_e32 v8, 1 637; VI-NEXT: v_xor_b32_e32 v6, -1, v6 638; VI-NEXT: v_lshlrev_b16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 639; VI-NEXT: v_and_b32_e32 v6, 15, v6 640; VI-NEXT: v_lshrrev_b16_sdwa v7, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 641; VI-NEXT: v_lshlrev_b16_e32 v6, v6, v8 642; VI-NEXT: v_or_b32_sdwa v6, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 643; VI-NEXT: v_xor_b32_e32 v7, -1, v5 644; VI-NEXT: v_and_b32_e32 v5, 15, v5 645; VI-NEXT: v_lshlrev_b16_e32 v1, 1, v1 646; VI-NEXT: v_and_b32_e32 v7, 15, v7 647; VI-NEXT: v_lshlrev_b16_e32 v1, v7, v1 648; VI-NEXT: v_lshrrev_b16_e32 v3, v5, v3 649; VI-NEXT: v_or_b32_e32 v1, v1, v3 650; VI-NEXT: v_xor_b32_e32 v3, -1, v4 651; VI-NEXT: v_lshlrev_b16_e32 v0, 1, v0 652; VI-NEXT: v_and_b32_e32 v3, 15, v3 653; VI-NEXT: v_lshlrev_b16_e32 v0, v3, v0 654; VI-NEXT: v_and_b32_e32 v3, 15, v4 655; VI-NEXT: v_lshrrev_b16_e32 v2, v3, v2 656; VI-NEXT: v_or_b32_e32 v0, v0, v2 657; VI-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 658; VI-NEXT: s_setpc_b64 s[30:31] 659; 660; GFX9-LABEL: v_fshr_v3i16: 661; GFX9: ; %bb.0: 662; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 663; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v4 664; GFX9-NEXT: v_and_b32_e32 v7, 15, v6 665; GFX9-NEXT: v_mov_b32_e32 v8, 1 666; GFX9-NEXT: v_xor_b32_e32 v6, -1, v6 667; GFX9-NEXT: v_lshlrev_b16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 668; GFX9-NEXT: v_and_b32_e32 v6, 15, v6 669; GFX9-NEXT: v_lshrrev_b16_sdwa v7, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 670; GFX9-NEXT: v_lshlrev_b16_e32 v6, v6, v8 671; GFX9-NEXT: v_or_b32_e32 v6, v6, v7 672; GFX9-NEXT: v_xor_b32_e32 v7, -1, v5 673; GFX9-NEXT: v_and_b32_e32 v5, 15, v5 674; GFX9-NEXT: v_lshlrev_b16_e32 v1, 1, v1 675; GFX9-NEXT: v_and_b32_e32 v7, 15, v7 676; GFX9-NEXT: v_lshlrev_b16_e32 v1, v7, v1 677; GFX9-NEXT: v_lshrrev_b16_e32 v3, v5, v3 678; GFX9-NEXT: v_or_b32_e32 v1, v1, v3 679; GFX9-NEXT: v_xor_b32_e32 v3, -1, v4 680; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 681; GFX9-NEXT: v_and_b32_e32 v3, 15, v3 682; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0 683; GFX9-NEXT: v_and_b32_e32 v3, 15, v4 684; GFX9-NEXT: v_lshrrev_b16_e32 v2, v3, v2 685; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 686; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 687; GFX9-NEXT: v_lshl_or_b32 v0, v6, 16, v0 688; GFX9-NEXT: s_setpc_b64 s[30:31] 689; 690; R600-LABEL: v_fshr_v3i16: 691; R600: ; %bb.0: 692; R600-NEXT: CF_END 693; R600-NEXT: PAD 694 %ret = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %src0, <3 x i16> %src1, <3 x i16> %src2) 695 ret <3 x i16> %ret 696} 697 698define <4 x i16> @v_fshr_v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2) { 699; SI-LABEL: v_fshr_v4i16: 700; SI: ; %bb.0: 701; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 702; SI-NEXT: v_or_b32_e32 v9, 16, v9 703; SI-NEXT: v_lshlrev_b32_e32 v5, 16, v5 704; SI-NEXT: v_alignbit_b32 v1, v1, v5, v9 705; SI-NEXT: v_or_b32_e32 v5, 16, v8 706; SI-NEXT: v_lshlrev_b32_e32 v4, 16, v4 707; SI-NEXT: v_alignbit_b32 v0, v0, v4, v5 708; SI-NEXT: v_or_b32_e32 v4, 16, v11 709; SI-NEXT: v_lshlrev_b32_e32 v5, 16, v7 710; SI-NEXT: v_alignbit_b32 v3, v3, v5, v4 711; SI-NEXT: v_or_b32_e32 v4, 16, v10 712; SI-NEXT: v_lshlrev_b32_e32 v5, 16, v6 713; SI-NEXT: s_mov_b32 s4, 0xffff 714; SI-NEXT: v_alignbit_b32 v2, v2, v5, v4 715; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 716; SI-NEXT: v_and_b32_e32 v2, s4, v2 717; SI-NEXT: v_or_b32_e32 v2, v2, v3 718; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 719; SI-NEXT: v_and_b32_e32 v0, s4, v0 720; SI-NEXT: v_or_b32_e32 v0, v0, v1 721; SI-NEXT: v_alignbit_b32 v1, v2, v1, 16 722; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 723; SI-NEXT: s_setpc_b64 s[30:31] 724; 725; VI-LABEL: v_fshr_v4i16: 726; VI: ; %bb.0: 727; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 728; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v5 729; VI-NEXT: v_and_b32_e32 v7, 15, v6 730; VI-NEXT: v_xor_b32_e32 v6, -1, v6 731; VI-NEXT: v_mov_b32_e32 v8, 1 732; VI-NEXT: v_lshlrev_b16_sdwa v9, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 733; VI-NEXT: v_and_b32_e32 v6, 15, v6 734; VI-NEXT: v_lshrrev_b16_sdwa v7, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 735; VI-NEXT: v_lshlrev_b16_e32 v6, v6, v9 736; VI-NEXT: v_or_b32_sdwa v6, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 737; VI-NEXT: v_lshrrev_b32_e32 v7, 16, v4 738; VI-NEXT: v_and_b32_e32 v9, 15, v7 739; VI-NEXT: v_xor_b32_e32 v7, -1, v7 740; VI-NEXT: v_lshlrev_b16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 741; VI-NEXT: v_and_b32_e32 v7, 15, v7 742; VI-NEXT: v_lshlrev_b16_e32 v7, v7, v8 743; VI-NEXT: v_xor_b32_e32 v8, -1, v5 744; VI-NEXT: v_and_b32_e32 v5, 15, v5 745; VI-NEXT: v_lshlrev_b16_e32 v1, 1, v1 746; VI-NEXT: v_and_b32_e32 v8, 15, v8 747; VI-NEXT: v_lshlrev_b16_e32 v1, v8, v1 748; VI-NEXT: v_lshrrev_b16_e32 v3, v5, v3 749; VI-NEXT: v_or_b32_e32 v1, v1, v3 750; VI-NEXT: v_xor_b32_e32 v3, -1, v4 751; VI-NEXT: v_lshlrev_b16_e32 v0, 1, v0 752; VI-NEXT: v_and_b32_e32 v3, 15, v3 753; VI-NEXT: v_lshlrev_b16_e32 v0, v3, v0 754; VI-NEXT: v_and_b32_e32 v3, 15, v4 755; VI-NEXT: v_lshrrev_b16_sdwa v9, v9, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 756; VI-NEXT: v_lshrrev_b16_e32 v2, v3, v2 757; VI-NEXT: v_or_b32_sdwa v7, v7, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 758; VI-NEXT: v_or_b32_e32 v0, v0, v2 759; VI-NEXT: v_or_b32_sdwa v0, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 760; VI-NEXT: v_or_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 761; VI-NEXT: s_setpc_b64 s[30:31] 762; 763; GFX9-LABEL: v_fshr_v4i16: 764; GFX9: ; %bb.0: 765; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 766; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v5 767; GFX9-NEXT: v_and_b32_e32 v7, 15, v6 768; GFX9-NEXT: v_xor_b32_e32 v6, -1, v6 769; GFX9-NEXT: v_mov_b32_e32 v8, 1 770; GFX9-NEXT: v_lshlrev_b16_sdwa v9, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 771; GFX9-NEXT: v_and_b32_e32 v6, 15, v6 772; GFX9-NEXT: v_lshrrev_b16_sdwa v7, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 773; GFX9-NEXT: v_lshlrev_b16_e32 v6, v6, v9 774; GFX9-NEXT: v_or_b32_e32 v6, v6, v7 775; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4 776; GFX9-NEXT: v_and_b32_e32 v9, 15, v7 777; GFX9-NEXT: v_xor_b32_e32 v7, -1, v7 778; GFX9-NEXT: v_lshlrev_b16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 779; GFX9-NEXT: v_and_b32_e32 v7, 15, v7 780; GFX9-NEXT: v_lshlrev_b16_e32 v7, v7, v8 781; GFX9-NEXT: v_xor_b32_e32 v8, -1, v5 782; GFX9-NEXT: v_and_b32_e32 v5, 15, v5 783; GFX9-NEXT: v_lshlrev_b16_e32 v1, 1, v1 784; GFX9-NEXT: v_and_b32_e32 v8, 15, v8 785; GFX9-NEXT: v_lshlrev_b16_e32 v1, v8, v1 786; GFX9-NEXT: v_lshrrev_b16_e32 v3, v5, v3 787; GFX9-NEXT: v_or_b32_e32 v1, v1, v3 788; GFX9-NEXT: v_xor_b32_e32 v3, -1, v4 789; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 790; GFX9-NEXT: v_and_b32_e32 v3, 15, v3 791; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0 792; GFX9-NEXT: v_and_b32_e32 v3, 15, v4 793; GFX9-NEXT: v_lshrrev_b16_sdwa v9, v9, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 794; GFX9-NEXT: v_lshrrev_b16_e32 v2, v3, v2 795; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 796; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 797; GFX9-NEXT: v_and_b32_e32 v1, v2, v1 798; GFX9-NEXT: v_or_b32_e32 v7, v7, v9 799; GFX9-NEXT: v_and_b32_e32 v0, v2, v0 800; GFX9-NEXT: v_lshl_or_b32 v0, v7, 16, v0 801; GFX9-NEXT: v_lshl_or_b32 v1, v6, 16, v1 802; GFX9-NEXT: s_setpc_b64 s[30:31] 803; 804; R600-LABEL: v_fshr_v4i16: 805; R600: ; %bb.0: 806; R600-NEXT: CF_END 807; R600-NEXT: PAD 808 %ret = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2) 809 ret <4 x i16> %ret 810} 811 812define i64 @v_fshr_i64(i64 %src0, i64 %src1, i64 %src2) { 813; SI-LABEL: v_fshr_i64: 814; SI: ; %bb.0: 815; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 816; SI-NEXT: v_and_b32_e32 v5, 63, v4 817; SI-NEXT: v_not_b32_e32 v4, v4 818; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 819; SI-NEXT: v_and_b32_e32 v4, 63, v4 820; SI-NEXT: v_lshr_b64 v[2:3], v[2:3], v5 821; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v4 822; SI-NEXT: v_or_b32_e32 v1, v1, v3 823; SI-NEXT: v_or_b32_e32 v0, v0, v2 824; SI-NEXT: s_setpc_b64 s[30:31] 825; 826; VI-LABEL: v_fshr_i64: 827; VI: ; %bb.0: 828; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 829; VI-NEXT: v_and_b32_e32 v5, 63, v4 830; VI-NEXT: v_not_b32_e32 v4, v4 831; VI-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 832; VI-NEXT: v_and_b32_e32 v4, 63, v4 833; VI-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3] 834; VI-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] 835; VI-NEXT: v_or_b32_e32 v1, v1, v3 836; VI-NEXT: v_or_b32_e32 v0, v0, v2 837; VI-NEXT: s_setpc_b64 s[30:31] 838; 839; GFX9-LABEL: v_fshr_i64: 840; GFX9: ; %bb.0: 841; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 842; GFX9-NEXT: v_and_b32_e32 v5, 63, v4 843; GFX9-NEXT: v_not_b32_e32 v4, v4 844; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 845; GFX9-NEXT: v_and_b32_e32 v4, 63, v4 846; GFX9-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3] 847; GFX9-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] 848; GFX9-NEXT: v_or_b32_e32 v1, v1, v3 849; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 850; GFX9-NEXT: s_setpc_b64 s[30:31] 851; 852; R600-LABEL: v_fshr_i64: 853; R600: ; %bb.0: 854; R600-NEXT: CF_END 855; R600-NEXT: PAD 856 %ret = call i64 @llvm.fshr.i64(i64 %src0, i64 %src1, i64 %src2) 857 ret i64 %ret 858} 859 860define <2 x i64> @v_fshr_v2i64(<2 x i64> %src0, <2 x i64> %src1, <2 x i64> %src2) { 861; SI-LABEL: v_fshr_v2i64: 862; SI: ; %bb.0: 863; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 864; SI-NEXT: v_and_b32_e32 v9, 63, v8 865; SI-NEXT: v_not_b32_e32 v8, v8 866; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 867; SI-NEXT: v_and_b32_e32 v8, 63, v8 868; SI-NEXT: v_lshr_b64 v[4:5], v[4:5], v9 869; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v8 870; SI-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 871; SI-NEXT: v_or_b32_e32 v1, v1, v5 872; SI-NEXT: v_and_b32_e32 v5, 63, v10 873; SI-NEXT: v_lshr_b64 v[5:6], v[6:7], v5 874; SI-NEXT: v_not_b32_e32 v7, v10 875; SI-NEXT: v_and_b32_e32 v7, 63, v7 876; SI-NEXT: v_lshl_b64 v[2:3], v[2:3], v7 877; SI-NEXT: v_or_b32_e32 v0, v0, v4 878; SI-NEXT: v_or_b32_e32 v3, v3, v6 879; SI-NEXT: v_or_b32_e32 v2, v2, v5 880; SI-NEXT: s_setpc_b64 s[30:31] 881; 882; VI-LABEL: v_fshr_v2i64: 883; VI: ; %bb.0: 884; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 885; VI-NEXT: v_and_b32_e32 v9, 63, v8 886; VI-NEXT: v_not_b32_e32 v8, v8 887; VI-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 888; VI-NEXT: v_and_b32_e32 v8, 63, v8 889; VI-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5] 890; VI-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1] 891; VI-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 892; VI-NEXT: v_or_b32_e32 v1, v1, v5 893; VI-NEXT: v_and_b32_e32 v5, 63, v10 894; VI-NEXT: v_lshrrev_b64 v[5:6], v5, v[6:7] 895; VI-NEXT: v_not_b32_e32 v7, v10 896; VI-NEXT: v_and_b32_e32 v7, 63, v7 897; VI-NEXT: v_lshlrev_b64 v[2:3], v7, v[2:3] 898; VI-NEXT: v_or_b32_e32 v0, v0, v4 899; VI-NEXT: v_or_b32_e32 v3, v3, v6 900; VI-NEXT: v_or_b32_e32 v2, v2, v5 901; VI-NEXT: s_setpc_b64 s[30:31] 902; 903; GFX9-LABEL: v_fshr_v2i64: 904; GFX9: ; %bb.0: 905; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 906; GFX9-NEXT: v_and_b32_e32 v9, 63, v8 907; GFX9-NEXT: v_not_b32_e32 v8, v8 908; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 909; GFX9-NEXT: v_and_b32_e32 v8, 63, v8 910; GFX9-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5] 911; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1] 912; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 913; GFX9-NEXT: v_or_b32_e32 v1, v1, v5 914; GFX9-NEXT: v_and_b32_e32 v5, 63, v10 915; GFX9-NEXT: v_lshrrev_b64 v[5:6], v5, v[6:7] 916; GFX9-NEXT: v_not_b32_e32 v7, v10 917; GFX9-NEXT: v_and_b32_e32 v7, 63, v7 918; GFX9-NEXT: v_lshlrev_b64 v[2:3], v7, v[2:3] 919; GFX9-NEXT: v_or_b32_e32 v0, v0, v4 920; GFX9-NEXT: v_or_b32_e32 v3, v3, v6 921; GFX9-NEXT: v_or_b32_e32 v2, v2, v5 922; GFX9-NEXT: s_setpc_b64 s[30:31] 923; 924; R600-LABEL: v_fshr_v2i64: 925; R600: ; %bb.0: 926; R600-NEXT: CF_END 927; R600-NEXT: PAD 928 %ret = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %src0, <2 x i64> %src1, <2 x i64> %src2) 929 ret <2 x i64> %ret 930} 931 932define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) { 933; SI-LABEL: v_fshr_i24: 934; SI: ; %bb.0: 935; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 936; SI-NEXT: s_mov_b32 s4, 0xaaaaaaab 937; SI-NEXT: v_mul_hi_u32 v3, v2, s4 938; SI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 939; SI-NEXT: v_lshrrev_b32_e32 v3, 4, v3 940; SI-NEXT: v_mul_lo_u32 v3, v3, 24 941; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 942; SI-NEXT: v_add_i32_e32 v2, vcc, 8, v2 943; SI-NEXT: v_alignbit_b32 v0, v0, v1, v2 944; SI-NEXT: s_setpc_b64 s[30:31] 945; 946; VI-LABEL: v_fshr_i24: 947; VI: ; %bb.0: 948; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 949; VI-NEXT: s_mov_b32 s4, 0xaaaaaaab 950; VI-NEXT: v_mul_hi_u32 v3, v2, s4 951; VI-NEXT: v_lshlrev_b32_e32 v1, 8, v1 952; VI-NEXT: v_lshrrev_b32_e32 v3, 4, v3 953; VI-NEXT: v_mul_lo_u32 v3, v3, 24 954; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 955; VI-NEXT: v_add_u32_e32 v2, vcc, 8, v2 956; VI-NEXT: v_alignbit_b32 v0, v0, v1, v2 957; VI-NEXT: s_setpc_b64 s[30:31] 958; 959; GFX9-LABEL: v_fshr_i24: 960; GFX9: ; %bb.0: 961; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 962; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab 963; GFX9-NEXT: v_mul_hi_u32 v3, v2, s4 964; GFX9-NEXT: v_lshlrev_b32_e32 v1, 8, v1 965; GFX9-NEXT: v_lshrrev_b32_e32 v3, 4, v3 966; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24 967; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 968; GFX9-NEXT: v_add_u32_e32 v2, 8, v2 969; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, v2 970; GFX9-NEXT: s_setpc_b64 s[30:31] 971; 972; R600-LABEL: v_fshr_i24: 973; R600: ; %bb.0: 974; R600-NEXT: CF_END 975; R600-NEXT: PAD 976 %ret = call i24 @llvm.fshr.i24(i24 %src0, i24 %src1, i24 %src2) 977 ret i24 %ret 978} 979 980define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2) { 981; SI-LABEL: v_fshr_v2i24: 982; SI: ; %bb.0: 983; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 984; SI-NEXT: buffer_load_dword v1, off, s[0:3], s32 985; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 986; SI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 987; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 988; SI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:4 989; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:8 990; SI-NEXT: s_mov_b32 s4, 0xaaaaaaab 991; SI-NEXT: v_add_i32_e32 v7, vcc, 3, v0 992; SI-NEXT: v_add_i32_e32 v8, vcc, 4, v0 993; SI-NEXT: v_add_i32_e32 v9, vcc, 5, v0 994; SI-NEXT: v_add_i32_e32 v10, vcc, 2, v0 995; SI-NEXT: s_waitcnt vmcnt(4) 996; SI-NEXT: v_mul_hi_u32 v11, v2, s4 997; SI-NEXT: s_waitcnt vmcnt(3) 998; SI-NEXT: v_mul_hi_u32 v12, v3, s4 999; SI-NEXT: s_waitcnt vmcnt(2) 1000; SI-NEXT: v_lshlrev_b32_e32 v4, 8, v4 1001; SI-NEXT: v_lshrrev_b32_e32 v11, 4, v11 1002; SI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 1003; SI-NEXT: v_mul_lo_u32 v11, v11, 24 1004; SI-NEXT: v_mul_lo_u32 v12, v12, 24 1005; SI-NEXT: s_waitcnt vmcnt(0) 1006; SI-NEXT: v_lshlrev_b32_e32 v6, 8, v6 1007; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v11 1008; SI-NEXT: v_sub_i32_e32 v3, vcc, v3, v12 1009; SI-NEXT: v_add_i32_e32 v2, vcc, 8, v2 1010; SI-NEXT: v_add_i32_e32 v3, vcc, 8, v3 1011; SI-NEXT: v_alignbit_b32 v1, v1, v6, v2 1012; SI-NEXT: v_alignbit_b32 v2, v5, v4, v3 1013; SI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen 1014; SI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen 1015; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 1016; SI-NEXT: s_waitcnt expcnt(1) 1017; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 1018; SI-NEXT: s_waitcnt expcnt(0) 1019; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1020; SI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen 1021; SI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen 1022; SI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen 1023; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1024; SI-NEXT: s_setpc_b64 s[30:31] 1025; 1026; VI-LABEL: v_fshr_v2i24: 1027; VI: ; %bb.0: 1028; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1029; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 1030; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 1031; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 1032; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 1033; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:4 1034; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:8 1035; VI-NEXT: s_mov_b32 s4, 0xaaaaaaab 1036; VI-NEXT: v_add_u32_e32 v7, vcc, 3, v0 1037; VI-NEXT: v_add_u32_e32 v8, vcc, 4, v0 1038; VI-NEXT: v_add_u32_e32 v9, vcc, 5, v0 1039; VI-NEXT: v_add_u32_e32 v10, vcc, 2, v0 1040; VI-NEXT: s_waitcnt vmcnt(4) 1041; VI-NEXT: v_mul_hi_u32 v11, v2, s4 1042; VI-NEXT: s_waitcnt vmcnt(3) 1043; VI-NEXT: v_mul_hi_u32 v12, v3, s4 1044; VI-NEXT: s_waitcnt vmcnt(2) 1045; VI-NEXT: v_lshlrev_b32_e32 v4, 8, v4 1046; VI-NEXT: v_lshrrev_b32_e32 v11, 4, v11 1047; VI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 1048; VI-NEXT: v_mul_lo_u32 v11, v11, 24 1049; VI-NEXT: v_mul_lo_u32 v12, v12, 24 1050; VI-NEXT: s_waitcnt vmcnt(0) 1051; VI-NEXT: v_lshlrev_b32_e32 v6, 8, v6 1052; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v11 1053; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v12 1054; VI-NEXT: v_add_u32_e32 v2, vcc, 8, v2 1055; VI-NEXT: v_add_u32_e32 v3, vcc, 8, v3 1056; VI-NEXT: v_alignbit_b32 v1, v1, v6, v2 1057; VI-NEXT: v_alignbit_b32 v2, v5, v4, v3 1058; VI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen 1059; VI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen 1060; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 1061; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 1062; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1063; VI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen 1064; VI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen 1065; VI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen 1066; VI-NEXT: s_waitcnt vmcnt(0) 1067; VI-NEXT: s_setpc_b64 s[30:31] 1068; 1069; GFX9-LABEL: v_fshr_v2i24: 1070; GFX9: ; %bb.0: 1071; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1072; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 1073; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:20 1074; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:12 1075; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 1076; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:8 1077; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 1078; GFX9-NEXT: s_mov_b32 s4, 0xaaaaaaab 1079; GFX9-NEXT: s_waitcnt vmcnt(5) 1080; GFX9-NEXT: v_mul_hi_u32 v6, v1, s4 1081; GFX9-NEXT: s_waitcnt vmcnt(4) 1082; GFX9-NEXT: v_mul_hi_u32 v7, v2, s4 1083; GFX9-NEXT: s_waitcnt vmcnt(3) 1084; GFX9-NEXT: v_lshlrev_b32_e32 v3, 8, v3 1085; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v6 1086; GFX9-NEXT: v_lshrrev_b32_e32 v7, 4, v7 1087; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 1088; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 1089; GFX9-NEXT: s_waitcnt vmcnt(1) 1090; GFX9-NEXT: v_lshlrev_b32_e32 v5, 8, v5 1091; GFX9-NEXT: v_sub_u32_e32 v2, v2, v7 1092; GFX9-NEXT: v_sub_u32_e32 v1, v1, v6 1093; GFX9-NEXT: v_add_u32_e32 v2, 8, v2 1094; GFX9-NEXT: v_add_u32_e32 v1, 8, v1 1095; GFX9-NEXT: v_alignbit_b32 v2, v4, v3, v2 1096; GFX9-NEXT: s_waitcnt vmcnt(0) 1097; GFX9-NEXT: v_alignbit_b32 v1, v8, v5, v1 1098; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v2 1099; GFX9-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:3 1100; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen offset:2 1101; GFX9-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen 1102; GFX9-NEXT: buffer_store_byte_d16_hi v2, v0, s[0:3], 0 offen offset:5 1103; GFX9-NEXT: buffer_store_byte v3, v0, s[0:3], 0 offen offset:4 1104; GFX9-NEXT: s_waitcnt vmcnt(0) 1105; GFX9-NEXT: s_setpc_b64 s[30:31] 1106; 1107; R600-LABEL: v_fshr_v2i24: 1108; R600: ; %bb.0: 1109; R600-NEXT: CF_END 1110; R600-NEXT: PAD 1111 %ret = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2) 1112 ret <2 x i24> %ret 1113} 1114