1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX89,SI 3; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX89,VI 4; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX89,GFX9 5; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,R600 6 7declare i32 @llvm.fshr.i32(i32, i32, i32) 8declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) 9declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) 10declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 11declare i16 @llvm.fshr.i16(i16, i16, i16) 12declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) 13declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) 14declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) 15declare i64 @llvm.fshr.i64(i64, i64, i64) 16declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) 17declare i24 @llvm.fshr.i24(i24, i24, i24) 18declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) 19 20define amdgpu_kernel void @fshr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %z) { 21; SI-LABEL: fshr_i32: 22; SI: ; %bb.0: ; %entry 23; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 24; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xb 25; SI-NEXT: s_mov_b32 s7, 0xf000 26; SI-NEXT: s_mov_b32 s6, -1 27; SI-NEXT: s_waitcnt lgkmcnt(0) 28; SI-NEXT: v_mov_b32_e32 v0, s1 29; SI-NEXT: v_mov_b32_e32 v1, s2 30; SI-NEXT: v_alignbit_b32 v0, s0, v0, v1 31; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 32; SI-NEXT: s_endpgm 33; 34; VI-LABEL: fshr_i32: 35; VI: ; %bb.0: ; %entry 36; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 37; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c 38; VI-NEXT: s_waitcnt lgkmcnt(0) 39; VI-NEXT: v_mov_b32_e32 v0, s1 40; VI-NEXT: v_mov_b32_e32 v1, s2 41; VI-NEXT: v_alignbit_b32 v2, s0, v0, v1 42; VI-NEXT: v_mov_b32_e32 v0, s4 43; VI-NEXT: v_mov_b32_e32 v1, s5 44; VI-NEXT: flat_store_dword v[0:1], v2 45; VI-NEXT: s_endpgm 46; 47; GFX9-LABEL: fshr_i32: 48; GFX9: ; %bb.0: ; %entry 49; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 50; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c 51; GFX9-NEXT: s_waitcnt lgkmcnt(0) 52; GFX9-NEXT: v_mov_b32_e32 v0, s1 53; GFX9-NEXT: v_mov_b32_e32 v1, s2 54; GFX9-NEXT: v_alignbit_b32 v2, s0, v0, v1 55; GFX9-NEXT: v_mov_b32_e32 v0, s4 56; GFX9-NEXT: v_mov_b32_e32 v1, s5 57; GFX9-NEXT: global_store_dword v[0:1], v2, off 58; GFX9-NEXT: s_endpgm 59; 60; R600-LABEL: fshr_i32: 61; R600: ; %bb.0: ; %entry 62; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 63; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 64; R600-NEXT: CF_END 65; R600-NEXT: PAD 66; R600-NEXT: ALU clause starting at 4: 67; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 68; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 69; R600-NEXT: BIT_ALIGN_INT * T1.X, KC0[2].Z, KC0[2].W, KC0[3].X, 70entry: 71 %0 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 72 store i32 %0, i32 addrspace(1)* %in 73 ret void 74} 75 76define amdgpu_kernel void @fshr_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) { 77; SI-LABEL: fshr_i32_imm: 78; SI: ; %bb.0: ; %entry 79; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 80; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 81; SI-NEXT: s_mov_b32 s7, 0xf000 82; SI-NEXT: s_mov_b32 s6, -1 83; SI-NEXT: s_waitcnt lgkmcnt(0) 84; SI-NEXT: v_mov_b32_e32 v0, s1 85; SI-NEXT: v_alignbit_b32 v0, s0, v0, 7 86; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 87; SI-NEXT: s_endpgm 88; 89; VI-LABEL: fshr_i32_imm: 90; VI: ; %bb.0: ; %entry 91; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 92; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c 93; VI-NEXT: s_waitcnt lgkmcnt(0) 94; VI-NEXT: v_mov_b32_e32 v0, s1 95; VI-NEXT: v_alignbit_b32 v2, s0, v0, 7 96; VI-NEXT: v_mov_b32_e32 v0, s2 97; VI-NEXT: v_mov_b32_e32 v1, s3 98; VI-NEXT: flat_store_dword v[0:1], v2 99; VI-NEXT: s_endpgm 100; 101; GFX9-LABEL: fshr_i32_imm: 102; GFX9: ; %bb.0: ; %entry 103; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 104; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c 105; GFX9-NEXT: s_waitcnt lgkmcnt(0) 106; GFX9-NEXT: v_mov_b32_e32 v0, s1 107; GFX9-NEXT: v_alignbit_b32 v2, s0, v0, 7 108; GFX9-NEXT: v_mov_b32_e32 v0, s2 109; GFX9-NEXT: v_mov_b32_e32 v1, s3 110; GFX9-NEXT: global_store_dword v[0:1], v2, off 111; GFX9-NEXT: s_endpgm 112; 113; R600-LABEL: fshr_i32_imm: 114; R600: ; %bb.0: ; %entry 115; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 116; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 117; R600-NEXT: CF_END 118; R600-NEXT: PAD 119; R600-NEXT: ALU clause starting at 4: 120; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 121; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 122; R600-NEXT: BIT_ALIGN_INT * T1.X, KC0[2].Z, KC0[2].W, literal.x, 123; R600-NEXT: 7(9.809089e-45), 0(0.000000e+00) 124entry: 125 %0 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 7) 126 store i32 %0, i32 addrspace(1)* %in 127 ret void 128} 129 130define amdgpu_kernel void @fshr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { 131; SI-LABEL: fshr_v2i32: 132; SI: ; %bb.0: ; %entry 133; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 134; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb 135; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd 136; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xf 137; SI-NEXT: s_mov_b32 s7, 0xf000 138; SI-NEXT: s_mov_b32 s6, -1 139; SI-NEXT: s_waitcnt lgkmcnt(0) 140; SI-NEXT: v_mov_b32_e32 v0, s9 141; SI-NEXT: s_and_b32 s1, s1, 31 142; SI-NEXT: v_mov_b32_e32 v1, s1 143; SI-NEXT: s_cmp_eq_u32 s1, 0 144; SI-NEXT: s_cselect_b64 vcc, -1, 0 145; SI-NEXT: s_and_b32 s0, s0, 31 146; SI-NEXT: v_alignbit_b32 v1, s3, v0, v1 147; SI-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 148; SI-NEXT: s_cmp_eq_u32 s0, 0 149; SI-NEXT: v_mov_b32_e32 v0, s8 150; SI-NEXT: v_mov_b32_e32 v2, s0 151; SI-NEXT: v_alignbit_b32 v2, s2, v0, v2 152; SI-NEXT: s_cselect_b64 vcc, -1, 0 153; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 154; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 155; SI-NEXT: s_endpgm 156; 157; VI-LABEL: fshr_v2i32: 158; VI: ; %bb.0: ; %entry 159; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 160; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c 161; VI-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x34 162; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c 163; VI-NEXT: s_waitcnt lgkmcnt(0) 164; VI-NEXT: v_mov_b32_e32 v0, s7 165; VI-NEXT: s_and_b32 s1, s1, 31 166; VI-NEXT: v_mov_b32_e32 v1, s1 167; VI-NEXT: s_cmp_eq_u32 s1, 0 168; VI-NEXT: s_cselect_b64 vcc, -1, 0 169; VI-NEXT: s_and_b32 s0, s0, 31 170; VI-NEXT: v_alignbit_b32 v1, s5, v0, v1 171; VI-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 172; VI-NEXT: s_cmp_eq_u32 s0, 0 173; VI-NEXT: v_mov_b32_e32 v0, s6 174; VI-NEXT: v_mov_b32_e32 v2, s0 175; VI-NEXT: v_alignbit_b32 v2, s4, v0, v2 176; VI-NEXT: s_cselect_b64 vcc, -1, 0 177; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 178; VI-NEXT: v_mov_b32_e32 v2, s2 179; VI-NEXT: v_mov_b32_e32 v3, s3 180; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 181; VI-NEXT: s_endpgm 182; 183; GFX9-LABEL: fshr_v2i32: 184; GFX9: ; %bb.0: ; %entry 185; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 186; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c 187; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x34 188; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c 189; GFX9-NEXT: s_waitcnt lgkmcnt(0) 190; GFX9-NEXT: v_mov_b32_e32 v0, s7 191; GFX9-NEXT: s_and_b32 s1, s1, 31 192; GFX9-NEXT: v_mov_b32_e32 v1, s1 193; GFX9-NEXT: s_cmp_eq_u32 s1, 0 194; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 195; GFX9-NEXT: s_and_b32 s0, s0, 31 196; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, v1 197; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 198; GFX9-NEXT: s_cmp_eq_u32 s0, 0 199; GFX9-NEXT: v_mov_b32_e32 v0, s6 200; GFX9-NEXT: v_mov_b32_e32 v2, s0 201; GFX9-NEXT: v_alignbit_b32 v2, s4, v0, v2 202; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 203; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 204; GFX9-NEXT: v_mov_b32_e32 v2, s2 205; GFX9-NEXT: v_mov_b32_e32 v3, s3 206; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off 207; GFX9-NEXT: s_endpgm 208; 209; R600-LABEL: fshr_v2i32: 210; R600: ; %bb.0: ; %entry 211; R600-NEXT: ALU 11, @4, KC0[CB0:0-32], KC1[] 212; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 213; R600-NEXT: CF_END 214; R600-NEXT: PAD 215; R600-NEXT: ALU clause starting at 4: 216; R600-NEXT: AND_INT * T0.W, KC0[4].X, literal.x, 217; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00) 218; R600-NEXT: BIT_ALIGN_INT T1.W, KC0[3].X, KC0[3].Z, PV.W, 219; R600-NEXT: SETE_INT * T0.W, PV.W, 0.0, 220; R600-NEXT: CNDE_INT T0.Y, PS, PV.W, KC0[3].Z, 221; R600-NEXT: AND_INT * T0.W, KC0[3].W, literal.x, 222; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00) 223; R600-NEXT: BIT_ALIGN_INT T1.W, KC0[2].W, KC0[3].Y, PV.W, 224; R600-NEXT: SETE_INT * T0.W, PV.W, 0.0, 225; R600-NEXT: CNDE_INT T0.X, PS, PV.W, KC0[3].Y, 226; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 227; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 228entry: 229 %0 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) 230 store <2 x i32> %0, <2 x i32> addrspace(1)* %in 231 ret void 232} 233 234define amdgpu_kernel void @fshr_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) { 235; SI-LABEL: fshr_v2i32_imm: 236; SI: ; %bb.0: ; %entry 237; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 238; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb 239; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd 240; SI-NEXT: s_mov_b32 s7, 0xf000 241; SI-NEXT: s_mov_b32 s6, -1 242; SI-NEXT: s_waitcnt lgkmcnt(0) 243; SI-NEXT: v_mov_b32_e32 v0, s1 244; SI-NEXT: v_alignbit_b32 v1, s3, v0, 9 245; SI-NEXT: v_mov_b32_e32 v0, s0 246; SI-NEXT: v_alignbit_b32 v0, s2, v0, 7 247; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 248; SI-NEXT: s_endpgm 249; 250; VI-LABEL: fshr_v2i32_imm: 251; VI: ; %bb.0: ; %entry 252; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 253; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c 254; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 255; VI-NEXT: s_waitcnt lgkmcnt(0) 256; VI-NEXT: v_mov_b32_e32 v0, s1 257; VI-NEXT: v_mov_b32_e32 v2, s0 258; VI-NEXT: v_alignbit_b32 v1, s5, v0, 9 259; VI-NEXT: v_alignbit_b32 v0, s4, v2, 7 260; VI-NEXT: v_mov_b32_e32 v2, s2 261; VI-NEXT: v_mov_b32_e32 v3, s3 262; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 263; VI-NEXT: s_endpgm 264; 265; GFX9-LABEL: fshr_v2i32_imm: 266; GFX9: ; %bb.0: ; %entry 267; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 268; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c 269; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 270; GFX9-NEXT: s_waitcnt lgkmcnt(0) 271; GFX9-NEXT: v_mov_b32_e32 v0, s1 272; GFX9-NEXT: v_mov_b32_e32 v2, s0 273; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, 9 274; GFX9-NEXT: v_alignbit_b32 v0, s4, v2, 7 275; GFX9-NEXT: v_mov_b32_e32 v2, s2 276; GFX9-NEXT: v_mov_b32_e32 v3, s3 277; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off 278; GFX9-NEXT: s_endpgm 279; 280; R600-LABEL: fshr_v2i32_imm: 281; R600: ; %bb.0: ; %entry 282; R600-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] 283; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 284; R600-NEXT: CF_END 285; R600-NEXT: PAD 286; R600-NEXT: ALU clause starting at 4: 287; R600-NEXT: BIT_ALIGN_INT * T0.Y, KC0[3].X, KC0[3].Z, literal.x, 288; R600-NEXT: 9(1.261169e-44), 0(0.000000e+00) 289; R600-NEXT: BIT_ALIGN_INT * T0.X, KC0[2].W, KC0[3].Y, literal.x, 290; R600-NEXT: 7(9.809089e-45), 0(0.000000e+00) 291; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 292; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 293entry: 294 %0 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 7, i32 9>) 295 store <2 x i32> %0, <2 x i32> addrspace(1)* %in 296 ret void 297} 298 299define amdgpu_kernel void @fshr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { 300; SI-LABEL: fshr_v4i32: 301; SI: ; %bb.0: ; %entry 302; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 303; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd 304; SI-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x11 305; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x15 306; SI-NEXT: s_mov_b32 s7, 0xf000 307; SI-NEXT: s_mov_b32 s6, -1 308; SI-NEXT: s_waitcnt lgkmcnt(0) 309; SI-NEXT: v_mov_b32_e32 v0, s15 310; SI-NEXT: s_and_b32 s3, s3, 31 311; SI-NEXT: v_mov_b32_e32 v1, s3 312; SI-NEXT: s_cmp_eq_u32 s3, 0 313; SI-NEXT: s_cselect_b64 vcc, -1, 0 314; SI-NEXT: s_and_b32 s2, s2, 31 315; SI-NEXT: v_alignbit_b32 v1, s11, v0, v1 316; SI-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc 317; SI-NEXT: s_cmp_eq_u32 s2, 0 318; SI-NEXT: v_mov_b32_e32 v0, s14 319; SI-NEXT: v_mov_b32_e32 v1, s2 320; SI-NEXT: s_cselect_b64 vcc, -1, 0 321; SI-NEXT: s_and_b32 s1, s1, 31 322; SI-NEXT: v_alignbit_b32 v1, s10, v0, v1 323; SI-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc 324; SI-NEXT: s_cmp_eq_u32 s1, 0 325; SI-NEXT: v_mov_b32_e32 v0, s13 326; SI-NEXT: v_mov_b32_e32 v1, s1 327; SI-NEXT: s_cselect_b64 vcc, -1, 0 328; SI-NEXT: s_and_b32 s0, s0, 31 329; SI-NEXT: v_alignbit_b32 v1, s9, v0, v1 330; SI-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 331; SI-NEXT: s_cmp_eq_u32 s0, 0 332; SI-NEXT: v_mov_b32_e32 v0, s12 333; SI-NEXT: v_mov_b32_e32 v4, s0 334; SI-NEXT: v_alignbit_b32 v4, s8, v0, v4 335; SI-NEXT: s_cselect_b64 vcc, -1, 0 336; SI-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 337; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 338; SI-NEXT: s_endpgm 339; 340; VI-LABEL: fshr_v4i32: 341; VI: ; %bb.0: ; %entry 342; VI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x24 343; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 344; VI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x44 345; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x54 346; VI-NEXT: s_waitcnt lgkmcnt(0) 347; VI-NEXT: v_mov_b32_e32 v0, s11 348; VI-NEXT: s_and_b32 s3, s3, 31 349; VI-NEXT: v_mov_b32_e32 v1, s3 350; VI-NEXT: s_cmp_eq_u32 s3, 0 351; VI-NEXT: s_cselect_b64 vcc, -1, 0 352; VI-NEXT: s_and_b32 s2, s2, 31 353; VI-NEXT: v_alignbit_b32 v1, s7, v0, v1 354; VI-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc 355; VI-NEXT: s_cmp_eq_u32 s2, 0 356; VI-NEXT: v_mov_b32_e32 v0, s10 357; VI-NEXT: v_mov_b32_e32 v1, s2 358; VI-NEXT: s_cselect_b64 vcc, -1, 0 359; VI-NEXT: s_and_b32 s1, s1, 31 360; VI-NEXT: v_alignbit_b32 v1, s6, v0, v1 361; VI-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc 362; VI-NEXT: s_cmp_eq_u32 s1, 0 363; VI-NEXT: v_mov_b32_e32 v0, s9 364; VI-NEXT: v_mov_b32_e32 v1, s1 365; VI-NEXT: s_cselect_b64 vcc, -1, 0 366; VI-NEXT: s_and_b32 s0, s0, 31 367; VI-NEXT: v_alignbit_b32 v1, s5, v0, v1 368; VI-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 369; VI-NEXT: s_cmp_eq_u32 s0, 0 370; VI-NEXT: v_mov_b32_e32 v0, s8 371; VI-NEXT: v_mov_b32_e32 v4, s0 372; VI-NEXT: v_alignbit_b32 v4, s4, v0, v4 373; VI-NEXT: s_cselect_b64 vcc, -1, 0 374; VI-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 375; VI-NEXT: v_mov_b32_e32 v4, s12 376; VI-NEXT: v_mov_b32_e32 v5, s13 377; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 378; VI-NEXT: s_endpgm 379; 380; GFX9-LABEL: fshr_v4i32: 381; GFX9: ; %bb.0: ; %entry 382; GFX9-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x24 383; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 384; GFX9-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x44 385; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x54 386; GFX9-NEXT: s_waitcnt lgkmcnt(0) 387; GFX9-NEXT: v_mov_b32_e32 v0, s11 388; GFX9-NEXT: s_and_b32 s3, s3, 31 389; GFX9-NEXT: v_mov_b32_e32 v1, s3 390; GFX9-NEXT: s_cmp_eq_u32 s3, 0 391; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 392; GFX9-NEXT: s_and_b32 s2, s2, 31 393; GFX9-NEXT: v_alignbit_b32 v1, s7, v0, v1 394; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc 395; GFX9-NEXT: s_cmp_eq_u32 s2, 0 396; GFX9-NEXT: v_mov_b32_e32 v0, s10 397; GFX9-NEXT: v_mov_b32_e32 v1, s2 398; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 399; GFX9-NEXT: s_and_b32 s1, s1, 31 400; GFX9-NEXT: v_alignbit_b32 v1, s6, v0, v1 401; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc 402; GFX9-NEXT: s_cmp_eq_u32 s1, 0 403; GFX9-NEXT: v_mov_b32_e32 v0, s9 404; GFX9-NEXT: v_mov_b32_e32 v1, s1 405; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 406; GFX9-NEXT: s_and_b32 s0, s0, 31 407; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, v1 408; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 409; GFX9-NEXT: s_cmp_eq_u32 s0, 0 410; GFX9-NEXT: v_mov_b32_e32 v0, s8 411; GFX9-NEXT: v_mov_b32_e32 v4, s0 412; GFX9-NEXT: v_alignbit_b32 v4, s4, v0, v4 413; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 414; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 415; GFX9-NEXT: v_mov_b32_e32 v4, s12 416; GFX9-NEXT: v_mov_b32_e32 v5, s13 417; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off 418; GFX9-NEXT: s_endpgm 419; 420; R600-LABEL: fshr_v4i32: 421; R600: ; %bb.0: ; %entry 422; R600-NEXT: ALU 20, @4, KC0[CB0:0-32], KC1[] 423; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 424; R600-NEXT: CF_END 425; R600-NEXT: PAD 426; R600-NEXT: ALU clause starting at 4: 427; R600-NEXT: AND_INT T0.W, KC0[5].Z, literal.x, 428; R600-NEXT: AND_INT * T1.W, KC0[6].X, literal.x, 429; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00) 430; R600-NEXT: SETE_INT T0.Z, PS, 0.0, 431; R600-NEXT: BIT_ALIGN_INT * T1.W, KC0[4].X, KC0[5].X, PS, 432; R600-NEXT: AND_INT * T2.W, KC0[5].W, literal.x, 433; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00) 434; R600-NEXT: SETE_INT T1.Z, PV.W, 0.0, 435; R600-NEXT: BIT_ALIGN_INT * T2.W, KC0[3].W, KC0[4].W, PV.W, 436; R600-NEXT: CNDE_INT * T1.W, T0.Z, T1.W, KC0[5].X, 437; R600-NEXT: CNDE_INT T1.Z, T1.Z, T2.W, KC0[4].W, 438; R600-NEXT: BIT_ALIGN_INT T2.W, KC0[3].Z, KC0[4].Z, T0.W, 439; R600-NEXT: SETE_INT * T0.W, T0.W, 0.0, 440; R600-NEXT: CNDE_INT T1.Y, PS, PV.W, KC0[4].Z, 441; R600-NEXT: AND_INT * T0.W, KC0[5].Y, literal.x, 442; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00) 443; R600-NEXT: BIT_ALIGN_INT T2.W, KC0[3].Y, KC0[4].Y, PV.W, 444; R600-NEXT: SETE_INT * T0.W, PV.W, 0.0, 445; R600-NEXT: CNDE_INT T1.X, PS, PV.W, KC0[4].Y, 446; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 447; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 448entry: 449 %0 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) 450 store <4 x i32> %0, <4 x i32> addrspace(1)* %in 451 ret void 452} 453 454define amdgpu_kernel void @fshr_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) { 455; SI-LABEL: fshr_v4i32_imm: 456; SI: ; %bb.0: ; %entry 457; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 458; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd 459; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x11 460; SI-NEXT: s_mov_b32 s7, 0xf000 461; SI-NEXT: s_mov_b32 s6, -1 462; SI-NEXT: s_waitcnt lgkmcnt(0) 463; SI-NEXT: v_mov_b32_e32 v0, s3 464; SI-NEXT: v_alignbit_b32 v3, s11, v0, 1 465; SI-NEXT: v_mov_b32_e32 v0, s2 466; SI-NEXT: v_alignbit_b32 v2, s10, v0, 9 467; SI-NEXT: v_mov_b32_e32 v0, s1 468; SI-NEXT: v_alignbit_b32 v1, s9, v0, 7 469; SI-NEXT: v_mov_b32_e32 v0, s0 470; SI-NEXT: v_alignbit_b32 v0, s8, v0, 1 471; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 472; SI-NEXT: s_endpgm 473; 474; VI-LABEL: fshr_v4i32_imm: 475; VI: ; %bb.0: ; %entry 476; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 477; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 478; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x44 479; VI-NEXT: s_waitcnt lgkmcnt(0) 480; VI-NEXT: v_mov_b32_e32 v4, s8 481; VI-NEXT: v_mov_b32_e32 v5, s9 482; VI-NEXT: v_mov_b32_e32 v0, s3 483; VI-NEXT: v_mov_b32_e32 v1, s2 484; VI-NEXT: v_alignbit_b32 v3, s7, v0, 1 485; VI-NEXT: v_mov_b32_e32 v0, s1 486; VI-NEXT: v_alignbit_b32 v2, s6, v1, 9 487; VI-NEXT: v_alignbit_b32 v1, s5, v0, 7 488; VI-NEXT: v_mov_b32_e32 v0, s0 489; VI-NEXT: v_alignbit_b32 v0, s4, v0, 1 490; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 491; VI-NEXT: s_endpgm 492; 493; GFX9-LABEL: fshr_v4i32_imm: 494; GFX9: ; %bb.0: ; %entry 495; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 496; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 497; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x44 498; GFX9-NEXT: s_waitcnt lgkmcnt(0) 499; GFX9-NEXT: v_mov_b32_e32 v4, s8 500; GFX9-NEXT: v_mov_b32_e32 v5, s9 501; GFX9-NEXT: v_mov_b32_e32 v0, s3 502; GFX9-NEXT: v_mov_b32_e32 v1, s2 503; GFX9-NEXT: v_alignbit_b32 v3, s7, v0, 1 504; GFX9-NEXT: v_mov_b32_e32 v0, s1 505; GFX9-NEXT: v_alignbit_b32 v2, s6, v1, 9 506; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, 7 507; GFX9-NEXT: v_mov_b32_e32 v0, s0 508; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, 1 509; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off 510; GFX9-NEXT: s_endpgm 511; 512; R600-LABEL: fshr_v4i32_imm: 513; R600: ; %bb.0: ; %entry 514; R600-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 515; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 516; R600-NEXT: CF_END 517; R600-NEXT: PAD 518; R600-NEXT: ALU clause starting at 4: 519; R600-NEXT: BIT_ALIGN_INT * T0.W, KC0[4].X, KC0[5].X, 1, 520; R600-NEXT: BIT_ALIGN_INT * T0.Z, KC0[3].W, KC0[4].W, literal.x, 521; R600-NEXT: 9(1.261169e-44), 0(0.000000e+00) 522; R600-NEXT: BIT_ALIGN_INT * T0.Y, KC0[3].Z, KC0[4].Z, literal.x, 523; R600-NEXT: 7(9.809089e-45), 0(0.000000e+00) 524; R600-NEXT: BIT_ALIGN_INT * T0.X, KC0[3].Y, KC0[4].Y, 1, 525; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 526; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 527entry: 528 %0 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 7, i32 9, i32 33>) 529 store <4 x i32> %0, <4 x i32> addrspace(1)* %in 530 ret void 531} 532 533define i32 @v_fshr_i32(i32 %src0, i32 %src1, i32 %src2) { 534; GFX89-LABEL: v_fshr_i32: 535; GFX89: ; %bb.0: 536; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 537; GFX89-NEXT: v_alignbit_b32 v0, v0, v1, v2 538; GFX89-NEXT: s_setpc_b64 s[30:31] 539; 540; R600-LABEL: v_fshr_i32: 541; R600: ; %bb.0: 542; R600-NEXT: CF_END 543; R600-NEXT: PAD 544 %ret = call i32 @llvm.fshr.i32(i32 %src0, i32 %src1, i32 %src2) 545 ret i32 %ret 546} 547 548define <2 x i32> @v_fshr_v2i32(<2 x i32> %src0, <2 x i32> %src1, <2 x i32> %src2) { 549; GFX89-LABEL: v_fshr_v2i32: 550; GFX89: ; %bb.0: 551; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 552; GFX89-NEXT: v_and_b32_e32 v4, 31, v4 553; GFX89-NEXT: v_alignbit_b32 v0, v0, v2, v4 554; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 555; GFX89-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 556; GFX89-NEXT: v_and_b32_e32 v2, 31, v5 557; GFX89-NEXT: v_alignbit_b32 v1, v1, v3, v2 558; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 559; GFX89-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 560; GFX89-NEXT: s_setpc_b64 s[30:31] 561; 562; R600-LABEL: v_fshr_v2i32: 563; R600: ; %bb.0: 564; R600-NEXT: CF_END 565; R600-NEXT: PAD 566 %ret = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %src0, <2 x i32> %src1, <2 x i32> %src2) 567 ret <2 x i32> %ret 568} 569 570define <3 x i32> @v_fshr_v3i32(<3 x i32> %src0, <3 x i32> %src1, <3 x i32> %src2) { 571; GFX89-LABEL: v_fshr_v3i32: 572; GFX89: ; %bb.0: 573; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 574; GFX89-NEXT: v_and_b32_e32 v6, 31, v6 575; GFX89-NEXT: v_alignbit_b32 v0, v0, v3, v6 576; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 577; GFX89-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 578; GFX89-NEXT: v_and_b32_e32 v3, 31, v7 579; GFX89-NEXT: v_alignbit_b32 v1, v1, v4, v3 580; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 581; GFX89-NEXT: v_and_b32_e32 v3, 31, v8 582; GFX89-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 583; GFX89-NEXT: v_alignbit_b32 v2, v2, v5, v3 584; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 585; GFX89-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 586; GFX89-NEXT: s_setpc_b64 s[30:31] 587; 588; R600-LABEL: v_fshr_v3i32: 589; R600: ; %bb.0: 590; R600-NEXT: CF_END 591; R600-NEXT: PAD 592 %ret = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> %src0, <3 x i32> %src1, <3 x i32> %src2) 593 ret <3 x i32> %ret 594} 595 596define <4 x i32> @v_fshr_v4i32(<4 x i32> %src0, <4 x i32> %src1, <4 x i32> %src2) { 597; GFX89-LABEL: v_fshr_v4i32: 598; GFX89: ; %bb.0: 599; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 600; GFX89-NEXT: v_and_b32_e32 v8, 31, v8 601; GFX89-NEXT: v_alignbit_b32 v0, v0, v4, v8 602; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 603; GFX89-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 604; GFX89-NEXT: v_and_b32_e32 v4, 31, v9 605; GFX89-NEXT: v_alignbit_b32 v1, v1, v5, v4 606; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 607; GFX89-NEXT: v_and_b32_e32 v4, 31, v10 608; GFX89-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 609; GFX89-NEXT: v_alignbit_b32 v2, v2, v6, v4 610; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 611; GFX89-NEXT: v_and_b32_e32 v4, 31, v11 612; GFX89-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 613; GFX89-NEXT: v_alignbit_b32 v3, v3, v7, v4 614; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 615; GFX89-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 616; GFX89-NEXT: s_setpc_b64 s[30:31] 617; 618; R600-LABEL: v_fshr_v4i32: 619; R600: ; %bb.0: 620; R600-NEXT: CF_END 621; R600-NEXT: PAD 622 %ret = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %src0, <4 x i32> %src1, <4 x i32> %src2) 623 ret <4 x i32> %ret 624} 625 626define i16 @v_fshr_i16(i16 %src0, i16 %src1, i16 %src2) { 627; SI-LABEL: v_fshr_i16: 628; SI: ; %bb.0: 629; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 630; SI-NEXT: v_and_b32_e32 v2, 15, v2 631; SI-NEXT: v_and_b32_e32 v3, 0xffff, v1 632; SI-NEXT: v_sub_i32_e32 v4, vcc, 16, v2 633; SI-NEXT: v_lshr_b32_e32 v3, v3, v2 634; SI-NEXT: v_lshl_b32_e32 v0, v0, v4 635; SI-NEXT: v_or_b32_e32 v0, v0, v3 636; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 637; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 638; SI-NEXT: s_setpc_b64 s[30:31] 639; 640; VI-LABEL: v_fshr_i16: 641; VI: ; %bb.0: 642; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 643; VI-NEXT: v_and_b32_e32 v2, 15, v2 644; VI-NEXT: v_sub_u16_e32 v4, 16, v2 645; VI-NEXT: v_lshrrev_b16_e32 v3, v2, v1 646; VI-NEXT: v_lshlrev_b16_e32 v0, v4, v0 647; VI-NEXT: v_or_b32_e32 v0, v0, v3 648; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v2 649; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 650; VI-NEXT: s_setpc_b64 s[30:31] 651; 652; GFX9-LABEL: v_fshr_i16: 653; GFX9: ; %bb.0: 654; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 655; GFX9-NEXT: v_and_b32_e32 v2, 15, v2 656; GFX9-NEXT: v_sub_u16_e32 v4, 16, v2 657; GFX9-NEXT: v_lshrrev_b16_e32 v3, v2, v1 658; GFX9-NEXT: v_lshlrev_b16_e32 v0, v4, v0 659; GFX9-NEXT: v_or_b32_e32 v0, v0, v3 660; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v2 661; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 662; GFX9-NEXT: s_setpc_b64 s[30:31] 663; 664; R600-LABEL: v_fshr_i16: 665; R600: ; %bb.0: 666; R600-NEXT: CF_END 667; R600-NEXT: PAD 668 %ret = call i16 @llvm.fshr.i16(i16 %src0, i16 %src1, i16 %src2) 669 ret i16 %ret 670} 671 672define <2 x i16> @v_fshr_v2i16(<2 x i16> %src0, <2 x i16> %src1, <2 x i16> %src2) { 673; SI-LABEL: v_fshr_v2i16: 674; SI: ; %bb.0: 675; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 676; SI-NEXT: s_mov_b32 s4, 0xffff 677; SI-NEXT: v_and_b32_e32 v5, 15, v5 678; SI-NEXT: v_and_b32_e32 v7, s4, v3 679; SI-NEXT: v_sub_i32_e32 v8, vcc, 16, v5 680; SI-NEXT: v_lshr_b32_e32 v7, v7, v5 681; SI-NEXT: v_lshl_b32_e32 v1, v1, v8 682; SI-NEXT: v_or_b32_e32 v1, v1, v7 683; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 684; SI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 685; SI-NEXT: v_and_b32_e32 v3, 15, v4 686; SI-NEXT: v_sub_i32_e32 v5, vcc, 16, v3 687; SI-NEXT: v_and_b32_e32 v6, s4, v2 688; SI-NEXT: v_lshr_b32_e32 v4, v6, v3 689; SI-NEXT: v_lshl_b32_e32 v0, v0, v5 690; SI-NEXT: v_or_b32_e32 v0, v0, v4 691; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 692; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 693; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 694; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 695; SI-NEXT: v_or_b32_e32 v0, v0, v1 696; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 697; SI-NEXT: s_setpc_b64 s[30:31] 698; 699; VI-LABEL: v_fshr_v2i16: 700; VI: ; %bb.0: 701; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 702; VI-NEXT: v_and_b32_e32 v3, 0xf000f, v2 703; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v1 704; VI-NEXT: v_bfe_u32 v2, v2, 16, 4 705; VI-NEXT: v_lshrrev_b16_e32 v4, v3, v1 706; VI-NEXT: v_lshrrev_b16_sdwa v6, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 707; VI-NEXT: v_or_b32_e32 v4, v4, v6 708; VI-NEXT: v_sub_u16_e32 v6, 16, v2 709; VI-NEXT: v_sub_u16_e32 v7, 16, v3 710; VI-NEXT: v_lshlrev_b16_sdwa v6, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 711; VI-NEXT: v_lshlrev_b16_e32 v0, v7, v0 712; VI-NEXT: v_or_b32_e32 v0, v0, v6 713; VI-NEXT: v_or_b32_e32 v0, v0, v4 714; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v3 715; VI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 716; VI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 717; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v2 718; VI-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 719; VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 720; VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 721; VI-NEXT: s_setpc_b64 s[30:31] 722; 723; GFX9-LABEL: v_fshr_v2i16: 724; GFX9: ; %bb.0: 725; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 726; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v2 727; GFX9-NEXT: v_pk_sub_i16 v4, 16, v2 op_sel_hi:[0,1] 728; GFX9-NEXT: v_pk_lshlrev_b16 v0, v4, v0 729; GFX9-NEXT: v_pk_lshrrev_b16 v3, v2, v1 730; GFX9-NEXT: v_or_b32_e32 v0, v0, v3 731; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v2 732; GFX9-NEXT: v_mov_b32_e32 v4, 0 733; GFX9-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc 734; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 735; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1 736; GFX9-NEXT: v_cmp_eq_u16_sdwa s[4:5], v2, v4 src0_sel:WORD_1 src1_sel:DWORD 737; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] 738; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v3 739; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1 740; GFX9-NEXT: s_setpc_b64 s[30:31] 741; 742; R600-LABEL: v_fshr_v2i16: 743; R600: ; %bb.0: 744; R600-NEXT: CF_END 745; R600-NEXT: PAD 746 %ret = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %src0, <2 x i16> %src1, <2 x i16> %src2) 747 ret <2 x i16> %ret 748} 749 750define <3 x i16> @v_fshr_v3i16(<3 x i16> %src0, <3 x i16> %src1, <3 x i16> %src2) { 751; SI-LABEL: v_fshr_v3i16: 752; SI: ; %bb.0: 753; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 754; SI-NEXT: s_mov_b32 s4, 0xffff 755; SI-NEXT: v_and_b32_e32 v7, 15, v7 756; SI-NEXT: v_and_b32_e32 v12, s4, v4 757; SI-NEXT: v_sub_i32_e32 v13, vcc, 16, v7 758; SI-NEXT: v_lshr_b32_e32 v12, v12, v7 759; SI-NEXT: v_lshl_b32_e32 v1, v1, v13 760; SI-NEXT: v_or_b32_e32 v1, v1, v12 761; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 762; SI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 763; SI-NEXT: v_and_b32_e32 v4, 15, v6 764; SI-NEXT: v_sub_i32_e32 v7, vcc, 16, v4 765; SI-NEXT: v_and_b32_e32 v11, s4, v3 766; SI-NEXT: v_lshr_b32_e32 v6, v11, v4 767; SI-NEXT: v_lshl_b32_e32 v0, v0, v7 768; SI-NEXT: v_or_b32_e32 v0, v0, v6 769; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 770; SI-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 771; SI-NEXT: v_and_b32_e32 v3, 15, v8 772; SI-NEXT: v_sub_i32_e32 v6, vcc, 16, v3 773; SI-NEXT: v_and_b32_e32 v10, s4, v5 774; SI-NEXT: v_lshr_b32_e32 v4, v10, v3 775; SI-NEXT: v_lshl_b32_e32 v2, v2, v6 776; SI-NEXT: v_mov_b32_e32 v9, 0xffff 777; SI-NEXT: v_or_b32_e32 v2, v2, v4 778; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 779; SI-NEXT: v_cndmask_b32_e32 v3, v2, v5, vcc 780; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 781; SI-NEXT: v_and_b32_e32 v0, v9, v0 782; SI-NEXT: v_or_b32_e32 v0, v0, v1 783; SI-NEXT: v_and_b32_e32 v2, v9, v3 784; SI-NEXT: v_alignbit_b32 v1, v3, v1, 16 785; SI-NEXT: s_setpc_b64 s[30:31] 786; 787; VI-LABEL: v_fshr_v3i16: 788; VI: ; %bb.0: 789; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 790; VI-NEXT: v_mov_b32_e32 v6, 15 791; VI-NEXT: v_and_b32_sdwa v6, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 792; VI-NEXT: v_lshrrev_b32_e32 v7, 16, v2 793; VI-NEXT: v_lshrrev_b16_e32 v8, v6, v7 794; VI-NEXT: v_sub_u16_e32 v6, 16, v6 795; VI-NEXT: v_lshlrev_b16_sdwa v6, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 796; VI-NEXT: v_or_b32_e32 v6, v6, v8 797; VI-NEXT: v_bfe_u32 v8, v4, 16, 4 798; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v8 799; VI-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 800; VI-NEXT: v_and_b32_e32 v7, 15, v5 801; VI-NEXT: v_lshrrev_b16_e32 v8, v7, v3 802; VI-NEXT: v_sub_u16_e32 v7, 16, v7 803; VI-NEXT: v_lshlrev_b16_e32 v1, v7, v1 804; VI-NEXT: v_and_b32_e32 v5, 15, v5 805; VI-NEXT: v_or_b32_e32 v1, v1, v8 806; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v5 807; VI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 808; VI-NEXT: v_and_b32_e32 v3, 15, v4 809; VI-NEXT: v_lshrrev_b16_e32 v5, v3, v2 810; VI-NEXT: v_sub_u16_e32 v3, 16, v3 811; VI-NEXT: v_lshlrev_b16_e32 v0, v3, v0 812; VI-NEXT: v_and_b32_e32 v3, 0xf000f, v4 813; VI-NEXT: v_or_b32_e32 v0, v0, v5 814; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v3 815; VI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 816; VI-NEXT: v_lshlrev_b32_e32 v2, 16, v6 817; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 818; VI-NEXT: s_setpc_b64 s[30:31] 819; 820; GFX9-LABEL: v_fshr_v3i16: 821; GFX9: ; %bb.0: 822; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 823; GFX9-NEXT: v_mov_b32_e32 v7, 15 824; GFX9-NEXT: v_and_b32_e32 v6, 15, v4 825; GFX9-NEXT: v_mov_b32_e32 v8, 0xffff 826; GFX9-NEXT: v_and_b32_sdwa v7, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 827; GFX9-NEXT: v_and_b32_e32 v6, v8, v6 828; GFX9-NEXT: v_lshl_or_b32 v6, v7, 16, v6 829; GFX9-NEXT: v_pk_lshrrev_b16 v7, v6, v2 830; GFX9-NEXT: v_pk_sub_i16 v6, 16, v6 op_sel_hi:[0,1] 831; GFX9-NEXT: s_mov_b32 s6, 0xf000f 832; GFX9-NEXT: v_pk_lshlrev_b16 v0, v6, v0 833; GFX9-NEXT: v_and_b32_e32 v4, s6, v4 834; GFX9-NEXT: v_or_b32_e32 v0, v0, v7 835; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v4 836; GFX9-NEXT: v_mov_b32_e32 v7, 0 837; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v2, vcc 838; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 839; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2 840; GFX9-NEXT: v_cmp_eq_u16_sdwa s[4:5], v4, v7 src0_sel:WORD_1 src1_sel:DWORD 841; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] 842; GFX9-NEXT: v_and_b32_e32 v2, 15, v5 843; GFX9-NEXT: v_and_b32_e32 v2, v8, v2 844; GFX9-NEXT: v_pk_lshrrev_b16 v4, v2, v3 845; GFX9-NEXT: v_pk_sub_i16 v2, 16, v2 846; GFX9-NEXT: v_pk_lshlrev_b16 v1, v2, v1 847; GFX9-NEXT: v_and_b32_e32 v2, s6, v5 848; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v2 849; GFX9-NEXT: v_or_b32_e32 v1, v1, v4 850; GFX9-NEXT: v_and_b32_e32 v2, v8, v6 851; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 852; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v2 853; GFX9-NEXT: s_setpc_b64 s[30:31] 854; 855; R600-LABEL: v_fshr_v3i16: 856; R600: ; %bb.0: 857; R600-NEXT: CF_END 858; R600-NEXT: PAD 859 %ret = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %src0, <3 x i16> %src1, <3 x i16> %src2) 860 ret <3 x i16> %ret 861} 862 863define <4 x i16> @v_fshr_v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2) { 864; SI-LABEL: v_fshr_v4i16: 865; SI: ; %bb.0: 866; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 867; SI-NEXT: s_mov_b32 s4, 0xffff 868; SI-NEXT: v_and_b32_e32 v9, 15, v9 869; SI-NEXT: v_and_b32_e32 v16, s4, v5 870; SI-NEXT: v_sub_i32_e32 v17, vcc, 16, v9 871; SI-NEXT: v_lshr_b32_e32 v16, v16, v9 872; SI-NEXT: v_lshl_b32_e32 v1, v1, v17 873; SI-NEXT: v_or_b32_e32 v1, v1, v16 874; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9 875; SI-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 876; SI-NEXT: v_and_b32_e32 v5, 15, v8 877; SI-NEXT: v_sub_i32_e32 v9, vcc, 16, v5 878; SI-NEXT: v_and_b32_e32 v15, s4, v4 879; SI-NEXT: v_lshr_b32_e32 v8, v15, v5 880; SI-NEXT: v_lshl_b32_e32 v0, v0, v9 881; SI-NEXT: v_or_b32_e32 v0, v0, v8 882; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 883; SI-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 884; SI-NEXT: v_and_b32_e32 v4, 15, v11 885; SI-NEXT: v_sub_i32_e32 v8, vcc, 16, v4 886; SI-NEXT: v_and_b32_e32 v14, s4, v7 887; SI-NEXT: v_lshr_b32_e32 v5, v14, v4 888; SI-NEXT: v_lshl_b32_e32 v3, v3, v8 889; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 890; SI-NEXT: v_or_b32_e32 v3, v3, v5 891; SI-NEXT: v_and_b32_e32 v4, 15, v10 892; SI-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 893; SI-NEXT: v_sub_i32_e32 v7, vcc, 16, v4 894; SI-NEXT: v_and_b32_e32 v13, s4, v6 895; SI-NEXT: v_lshr_b32_e32 v5, v13, v4 896; SI-NEXT: v_lshl_b32_e32 v2, v2, v7 897; SI-NEXT: v_or_b32_e32 v2, v2, v5 898; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 899; SI-NEXT: v_mov_b32_e32 v12, 0xffff 900; SI-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 901; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 902; SI-NEXT: v_and_b32_e32 v2, v12, v2 903; SI-NEXT: v_or_b32_e32 v2, v2, v3 904; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 905; SI-NEXT: v_and_b32_e32 v0, v12, v0 906; SI-NEXT: v_or_b32_e32 v0, v0, v1 907; SI-NEXT: v_alignbit_b32 v1, v2, v1, 16 908; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 909; SI-NEXT: s_setpc_b64 s[30:31] 910; 911; VI-LABEL: v_fshr_v4i16: 912; VI: ; %bb.0: 913; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 914; VI-NEXT: v_mov_b32_e32 v6, 15 915; VI-NEXT: v_and_b32_sdwa v7, v5, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 916; VI-NEXT: v_lshrrev_b32_e32 v8, 16, v3 917; VI-NEXT: v_lshrrev_b16_e32 v9, v7, v8 918; VI-NEXT: v_sub_u16_e32 v7, 16, v7 919; VI-NEXT: v_lshlrev_b16_sdwa v7, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 920; VI-NEXT: v_or_b32_e32 v7, v7, v9 921; VI-NEXT: v_bfe_u32 v9, v5, 16, 4 922; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v9 923; VI-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 924; VI-NEXT: v_and_b32_sdwa v6, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 925; VI-NEXT: v_lshrrev_b32_e32 v8, 16, v2 926; VI-NEXT: v_lshrrev_b16_e32 v9, v6, v8 927; VI-NEXT: v_sub_u16_e32 v6, 16, v6 928; VI-NEXT: v_lshlrev_b16_sdwa v6, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 929; VI-NEXT: v_or_b32_e32 v6, v6, v9 930; VI-NEXT: v_bfe_u32 v9, v4, 16, 4 931; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v9 932; VI-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 933; VI-NEXT: v_and_b32_e32 v8, 15, v5 934; VI-NEXT: v_lshrrev_b16_e32 v9, v8, v3 935; VI-NEXT: v_sub_u16_e32 v8, 16, v8 936; VI-NEXT: s_mov_b32 s4, 0xf000f 937; VI-NEXT: v_lshlrev_b16_e32 v1, v8, v1 938; VI-NEXT: v_and_b32_e32 v5, s4, v5 939; VI-NEXT: v_or_b32_e32 v1, v1, v9 940; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v5 941; VI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 942; VI-NEXT: v_and_b32_e32 v3, 15, v4 943; VI-NEXT: v_lshrrev_b16_e32 v5, v3, v2 944; VI-NEXT: v_sub_u16_e32 v3, 16, v3 945; VI-NEXT: v_lshlrev_b16_e32 v0, v3, v0 946; VI-NEXT: v_and_b32_e32 v3, s4, v4 947; VI-NEXT: v_or_b32_e32 v0, v0, v5 948; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v3 949; VI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 950; VI-NEXT: v_lshlrev_b32_e32 v2, 16, v6 951; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 952; VI-NEXT: v_lshlrev_b32_e32 v2, 16, v7 953; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 954; VI-NEXT: s_setpc_b64 s[30:31] 955; 956; GFX9-LABEL: v_fshr_v4i16: 957; GFX9: ; %bb.0: 958; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 959; GFX9-NEXT: v_mov_b32_e32 v7, 15 960; GFX9-NEXT: v_and_b32_e32 v6, 15, v5 961; GFX9-NEXT: v_mov_b32_e32 v9, 0xffff 962; GFX9-NEXT: v_and_b32_sdwa v8, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 963; GFX9-NEXT: v_and_b32_e32 v6, v9, v6 964; GFX9-NEXT: v_lshl_or_b32 v6, v8, 16, v6 965; GFX9-NEXT: v_pk_lshrrev_b16 v8, v6, v3 966; GFX9-NEXT: v_pk_sub_i16 v6, 16, v6 op_sel_hi:[0,1] 967; GFX9-NEXT: s_mov_b32 s6, 0xf000f 968; GFX9-NEXT: v_pk_lshlrev_b16 v1, v6, v1 969; GFX9-NEXT: v_and_b32_e32 v5, s6, v5 970; GFX9-NEXT: v_or_b32_e32 v1, v1, v8 971; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v5 972; GFX9-NEXT: v_mov_b32_e32 v8, 0 973; GFX9-NEXT: v_cndmask_b32_e32 v6, v1, v3, vcc 974; GFX9-NEXT: v_cmp_eq_u16_sdwa s[4:5], v5, v8 src0_sel:WORD_1 src1_sel:DWORD 975; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1 976; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3 977; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[4:5] 978; GFX9-NEXT: v_and_b32_e32 v3, 15, v4 979; GFX9-NEXT: v_and_b32_sdwa v5, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 980; GFX9-NEXT: v_and_b32_e32 v3, v9, v3 981; GFX9-NEXT: v_lshl_or_b32 v3, v5, 16, v3 982; GFX9-NEXT: v_pk_lshrrev_b16 v5, v3, v2 983; GFX9-NEXT: v_pk_sub_i16 v3, 16, v3 op_sel_hi:[0,1] 984; GFX9-NEXT: v_pk_lshlrev_b16 v0, v3, v0 985; GFX9-NEXT: v_and_b32_e32 v3, s6, v4 986; GFX9-NEXT: v_or_b32_e32 v0, v0, v5 987; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v3 988; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc 989; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 990; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2 991; GFX9-NEXT: v_cmp_eq_u16_sdwa s[4:5], v3, v8 src0_sel:WORD_1 src1_sel:DWORD 992; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] 993; GFX9-NEXT: v_and_b32_e32 v2, v9, v4 994; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v2 995; GFX9-NEXT: v_and_b32_e32 v2, v9, v6 996; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v2 997; GFX9-NEXT: s_setpc_b64 s[30:31] 998; 999; R600-LABEL: v_fshr_v4i16: 1000; R600: ; %bb.0: 1001; R600-NEXT: CF_END 1002; R600-NEXT: PAD 1003 %ret = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2) 1004 ret <4 x i16> %ret 1005} 1006 1007define i64 @v_fshr_i64(i64 %src0, i64 %src1, i64 %src2) { 1008; SI-LABEL: v_fshr_i64: 1009; SI: ; %bb.0: 1010; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1011; SI-NEXT: v_and_b32_e32 v4, 63, v4 1012; SI-NEXT: v_sub_i32_e32 v7, vcc, 64, v4 1013; SI-NEXT: v_lshr_b64 v[5:6], v[2:3], v4 1014; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v7 1015; SI-NEXT: v_or_b32_e32 v0, v0, v5 1016; SI-NEXT: v_mov_b32_e32 v5, 0 1017; SI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] 1018; SI-NEXT: v_or_b32_e32 v1, v1, v6 1019; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1020; SI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1021; SI-NEXT: s_setpc_b64 s[30:31] 1022; 1023; VI-LABEL: v_fshr_i64: 1024; VI: ; %bb.0: 1025; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1026; VI-NEXT: v_and_b32_e32 v4, 63, v4 1027; VI-NEXT: v_sub_u32_e32 v7, vcc, 64, v4 1028; VI-NEXT: v_lshrrev_b64 v[5:6], v4, v[2:3] 1029; VI-NEXT: v_lshlrev_b64 v[0:1], v7, v[0:1] 1030; VI-NEXT: v_or_b32_e32 v0, v0, v5 1031; VI-NEXT: v_mov_b32_e32 v5, 0 1032; VI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] 1033; VI-NEXT: v_or_b32_e32 v1, v1, v6 1034; VI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1035; VI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1036; VI-NEXT: s_setpc_b64 s[30:31] 1037; 1038; GFX9-LABEL: v_fshr_i64: 1039; GFX9: ; %bb.0: 1040; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1041; GFX9-NEXT: v_and_b32_e32 v4, 63, v4 1042; GFX9-NEXT: v_sub_u32_e32 v7, 64, v4 1043; GFX9-NEXT: v_lshrrev_b64 v[5:6], v4, v[2:3] 1044; GFX9-NEXT: v_lshlrev_b64 v[0:1], v7, v[0:1] 1045; GFX9-NEXT: v_or_b32_e32 v0, v0, v5 1046; GFX9-NEXT: v_mov_b32_e32 v5, 0 1047; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] 1048; GFX9-NEXT: v_or_b32_e32 v1, v1, v6 1049; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1050; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1051; GFX9-NEXT: s_setpc_b64 s[30:31] 1052; 1053; R600-LABEL: v_fshr_i64: 1054; R600: ; %bb.0: 1055; R600-NEXT: CF_END 1056; R600-NEXT: PAD 1057 %ret = call i64 @llvm.fshr.i64(i64 %src0, i64 %src1, i64 %src2) 1058 ret i64 %ret 1059} 1060 1061define <2 x i64> @v_fshr_v2i64(<2 x i64> %src0, <2 x i64> %src1, <2 x i64> %src2) { 1062; SI-LABEL: v_fshr_v2i64: 1063; SI: ; %bb.0: 1064; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1065; SI-NEXT: v_and_b32_e32 v8, 63, v8 1066; SI-NEXT: v_sub_i32_e32 v9, vcc, 64, v8 1067; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v9 1068; SI-NEXT: v_lshr_b64 v[11:12], v[4:5], v8 1069; SI-NEXT: v_mov_b32_e32 v9, 0 1070; SI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 1071; SI-NEXT: v_or_b32_e32 v0, v0, v11 1072; SI-NEXT: v_and_b32_e32 v8, 63, v10 1073; SI-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1074; SI-NEXT: v_sub_i32_e64 v4, s[4:5], 64, v8 1075; SI-NEXT: v_or_b32_e32 v1, v1, v12 1076; SI-NEXT: v_lshr_b64 v[10:11], v[6:7], v8 1077; SI-NEXT: v_lshl_b64 v[2:3], v[2:3], v4 1078; SI-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1079; SI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 1080; SI-NEXT: v_or_b32_e32 v3, v3, v11 1081; SI-NEXT: v_or_b32_e32 v2, v2, v10 1082; SI-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 1083; SI-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 1084; SI-NEXT: s_setpc_b64 s[30:31] 1085; 1086; VI-LABEL: v_fshr_v2i64: 1087; VI: ; %bb.0: 1088; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1089; VI-NEXT: v_and_b32_e32 v8, 63, v8 1090; VI-NEXT: v_sub_u32_e32 v9, vcc, 64, v8 1091; VI-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1] 1092; VI-NEXT: v_lshrrev_b64 v[11:12], v8, v[4:5] 1093; VI-NEXT: v_mov_b32_e32 v9, 0 1094; VI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 1095; VI-NEXT: v_or_b32_e32 v0, v0, v11 1096; VI-NEXT: v_and_b32_e32 v8, 63, v10 1097; VI-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1098; VI-NEXT: v_sub_u32_e64 v4, s[4:5], 64, v8 1099; VI-NEXT: v_or_b32_e32 v1, v1, v12 1100; VI-NEXT: v_lshrrev_b64 v[10:11], v8, v[6:7] 1101; VI-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3] 1102; VI-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1103; VI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 1104; VI-NEXT: v_or_b32_e32 v3, v3, v11 1105; VI-NEXT: v_or_b32_e32 v2, v2, v10 1106; VI-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 1107; VI-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 1108; VI-NEXT: s_setpc_b64 s[30:31] 1109; 1110; GFX9-LABEL: v_fshr_v2i64: 1111; GFX9: ; %bb.0: 1112; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1113; GFX9-NEXT: v_and_b32_e32 v8, 63, v8 1114; GFX9-NEXT: v_sub_u32_e32 v9, 64, v8 1115; GFX9-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1] 1116; GFX9-NEXT: v_lshrrev_b64 v[11:12], v8, v[4:5] 1117; GFX9-NEXT: v_mov_b32_e32 v9, 0 1118; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 1119; GFX9-NEXT: v_or_b32_e32 v0, v0, v11 1120; GFX9-NEXT: v_and_b32_e32 v8, 63, v10 1121; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1122; GFX9-NEXT: v_sub_u32_e32 v4, 64, v8 1123; GFX9-NEXT: v_or_b32_e32 v1, v1, v12 1124; GFX9-NEXT: v_lshrrev_b64 v[10:11], v8, v[6:7] 1125; GFX9-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3] 1126; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1127; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 1128; GFX9-NEXT: v_or_b32_e32 v3, v3, v11 1129; GFX9-NEXT: v_or_b32_e32 v2, v2, v10 1130; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 1131; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 1132; GFX9-NEXT: s_setpc_b64 s[30:31] 1133; 1134; R600-LABEL: v_fshr_v2i64: 1135; R600: ; %bb.0: 1136; R600-NEXT: CF_END 1137; R600-NEXT: PAD 1138 %ret = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %src0, <2 x i64> %src1, <2 x i64> %src2) 1139 ret <2 x i64> %ret 1140} 1141 1142define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) { 1143; SI-LABEL: v_fshr_i24: 1144; SI: ; %bb.0: 1145; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1146; SI-NEXT: s_mov_b32 s4, 0xffffff 1147; SI-NEXT: v_and_b32_e32 v2, s4, v2 1148; SI-NEXT: s_mov_b32 s5, 0xaaaaaaab 1149; SI-NEXT: v_mul_hi_u32 v3, v2, s5 1150; SI-NEXT: v_and_b32_e32 v4, s4, v1 1151; SI-NEXT: v_lshrrev_b32_e32 v3, 4, v3 1152; SI-NEXT: v_mul_lo_u32 v3, v3, 24 1153; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 1154; SI-NEXT: v_lshr_b32_e32 v3, v4, v2 1155; SI-NEXT: v_sub_i32_e32 v4, vcc, 24, v2 1156; SI-NEXT: v_and_b32_e32 v4, s4, v4 1157; SI-NEXT: v_lshl_b32_e32 v0, v0, v4 1158; SI-NEXT: v_or_b32_e32 v0, v0, v3 1159; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 1160; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1161; SI-NEXT: s_setpc_b64 s[30:31] 1162; 1163; VI-LABEL: v_fshr_i24: 1164; VI: ; %bb.0: 1165; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1166; VI-NEXT: s_mov_b32 s4, 0xffffff 1167; VI-NEXT: v_and_b32_e32 v2, s4, v2 1168; VI-NEXT: s_mov_b32 s5, 0xaaaaaaab 1169; VI-NEXT: v_mul_hi_u32 v3, v2, s5 1170; VI-NEXT: v_and_b32_e32 v4, s4, v1 1171; VI-NEXT: v_lshrrev_b32_e32 v3, 4, v3 1172; VI-NEXT: v_mul_lo_u32 v3, v3, 24 1173; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 1174; VI-NEXT: v_lshrrev_b32_e32 v3, v2, v4 1175; VI-NEXT: v_sub_u32_e32 v4, vcc, 24, v2 1176; VI-NEXT: v_and_b32_e32 v4, s4, v4 1177; VI-NEXT: v_lshlrev_b32_e32 v0, v4, v0 1178; VI-NEXT: v_or_b32_e32 v0, v0, v3 1179; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 1180; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1181; VI-NEXT: s_setpc_b64 s[30:31] 1182; 1183; GFX9-LABEL: v_fshr_i24: 1184; GFX9: ; %bb.0: 1185; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1186; GFX9-NEXT: s_mov_b32 s4, 0xffffff 1187; GFX9-NEXT: v_and_b32_e32 v2, s4, v2 1188; GFX9-NEXT: s_mov_b32 s5, 0xaaaaaaab 1189; GFX9-NEXT: v_mul_hi_u32 v3, v2, s5 1190; GFX9-NEXT: v_and_b32_e32 v4, s4, v1 1191; GFX9-NEXT: v_lshrrev_b32_e32 v3, 4, v3 1192; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24 1193; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 1194; GFX9-NEXT: v_lshrrev_b32_e32 v3, v2, v4 1195; GFX9-NEXT: v_sub_u32_e32 v4, 24, v2 1196; GFX9-NEXT: v_and_b32_e32 v4, s4, v4 1197; GFX9-NEXT: v_lshl_or_b32 v0, v0, v4, v3 1198; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 1199; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1200; GFX9-NEXT: s_setpc_b64 s[30:31] 1201; 1202; R600-LABEL: v_fshr_i24: 1203; R600: ; %bb.0: 1204; R600-NEXT: CF_END 1205; R600-NEXT: PAD 1206 %ret = call i24 @llvm.fshr.i24(i24 %src0, i24 %src1, i24 %src2) 1207 ret i24 %ret 1208} 1209 1210define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2) { 1211; SI-LABEL: v_fshr_v2i24: 1212; SI: ; %bb.0: 1213; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1214; SI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 1215; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 1216; SI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 1217; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 1218; SI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:12 1219; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 1220; SI-NEXT: s_mov_b32 s4, 0xffffff 1221; SI-NEXT: s_mov_b32 s5, 0xaaaaaaab 1222; SI-NEXT: v_add_i32_e32 v7, vcc, 3, v0 1223; SI-NEXT: v_add_i32_e32 v8, vcc, 4, v0 1224; SI-NEXT: v_add_i32_e32 v9, vcc, 5, v0 1225; SI-NEXT: v_add_i32_e32 v10, vcc, 2, v0 1226; SI-NEXT: s_waitcnt vmcnt(5) 1227; SI-NEXT: v_and_b32_e32 v14, s4, v1 1228; SI-NEXT: s_waitcnt vmcnt(4) 1229; SI-NEXT: v_and_b32_e32 v2, s4, v2 1230; SI-NEXT: v_mul_hi_u32 v12, v2, s5 1231; SI-NEXT: s_waitcnt vmcnt(3) 1232; SI-NEXT: v_and_b32_e32 v3, s4, v3 1233; SI-NEXT: v_mul_hi_u32 v13, v3, s5 1234; SI-NEXT: s_waitcnt vmcnt(1) 1235; SI-NEXT: v_and_b32_e32 v11, s4, v5 1236; SI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 1237; SI-NEXT: v_mul_lo_u32 v12, v12, 24 1238; SI-NEXT: v_lshrrev_b32_e32 v13, 4, v13 1239; SI-NEXT: v_mul_lo_u32 v13, v13, 24 1240; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v12 1241; SI-NEXT: v_lshr_b32_e32 v12, v14, v2 1242; SI-NEXT: v_sub_i32_e32 v3, vcc, v3, v13 1243; SI-NEXT: v_sub_i32_e32 v13, vcc, 24, v2 1244; SI-NEXT: v_sub_i32_e32 v14, vcc, 24, v3 1245; SI-NEXT: v_and_b32_e32 v13, s4, v13 1246; SI-NEXT: s_waitcnt vmcnt(0) 1247; SI-NEXT: v_lshl_b32_e32 v6, v6, v13 1248; SI-NEXT: v_and_b32_e32 v14, 0xffffff, v14 1249; SI-NEXT: v_lshr_b32_e32 v11, v11, v3 1250; SI-NEXT: v_lshl_b32_e32 v4, v4, v14 1251; SI-NEXT: v_or_b32_e32 v6, v6, v12 1252; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 1253; SI-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc 1254; SI-NEXT: v_or_b32_e32 v4, v4, v11 1255; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 1256; SI-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc 1257; SI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen 1258; SI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen 1259; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 1260; SI-NEXT: s_waitcnt expcnt(1) 1261; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 1262; SI-NEXT: s_waitcnt expcnt(0) 1263; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1264; SI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen 1265; SI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen 1266; SI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen 1267; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1268; SI-NEXT: s_setpc_b64 s[30:31] 1269; 1270; VI-LABEL: v_fshr_v2i24: 1271; VI: ; %bb.0: 1272; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1273; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 1274; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 1275; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 1276; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:4 1277; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:12 1278; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 1279; VI-NEXT: s_mov_b32 s4, 0xffffff 1280; VI-NEXT: s_mov_b32 s5, 0xaaaaaaab 1281; VI-NEXT: v_add_u32_e32 v7, vcc, 3, v0 1282; VI-NEXT: v_add_u32_e32 v8, vcc, 4, v0 1283; VI-NEXT: v_add_u32_e32 v9, vcc, 5, v0 1284; VI-NEXT: v_add_u32_e32 v10, vcc, 2, v0 1285; VI-NEXT: s_waitcnt vmcnt(5) 1286; VI-NEXT: v_and_b32_e32 v14, s4, v1 1287; VI-NEXT: s_waitcnt vmcnt(4) 1288; VI-NEXT: v_and_b32_e32 v2, s4, v2 1289; VI-NEXT: v_mul_hi_u32 v12, v2, s5 1290; VI-NEXT: s_waitcnt vmcnt(3) 1291; VI-NEXT: v_and_b32_e32 v3, s4, v3 1292; VI-NEXT: v_mul_hi_u32 v13, v3, s5 1293; VI-NEXT: s_waitcnt vmcnt(1) 1294; VI-NEXT: v_and_b32_e32 v11, s4, v5 1295; VI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 1296; VI-NEXT: v_mul_lo_u32 v12, v12, 24 1297; VI-NEXT: v_lshrrev_b32_e32 v13, 4, v13 1298; VI-NEXT: v_mul_lo_u32 v13, v13, 24 1299; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v12 1300; VI-NEXT: v_lshrrev_b32_e32 v12, v2, v14 1301; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v13 1302; VI-NEXT: v_sub_u32_e32 v13, vcc, 24, v2 1303; VI-NEXT: v_sub_u32_e32 v14, vcc, 24, v3 1304; VI-NEXT: v_and_b32_e32 v13, s4, v13 1305; VI-NEXT: s_waitcnt vmcnt(0) 1306; VI-NEXT: v_lshlrev_b32_e32 v6, v13, v6 1307; VI-NEXT: v_and_b32_e32 v14, 0xffffff, v14 1308; VI-NEXT: v_lshrrev_b32_e32 v11, v3, v11 1309; VI-NEXT: v_lshlrev_b32_e32 v4, v14, v4 1310; VI-NEXT: v_or_b32_e32 v6, v6, v12 1311; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 1312; VI-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc 1313; VI-NEXT: v_or_b32_e32 v4, v4, v11 1314; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 1315; VI-NEXT: v_cndmask_b32_e32 v2, v4, v5, vcc 1316; VI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen 1317; VI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen 1318; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 1319; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 1320; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1321; VI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen 1322; VI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen 1323; VI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen 1324; VI-NEXT: s_waitcnt vmcnt(0) 1325; VI-NEXT: s_setpc_b64 s[30:31] 1326; 1327; GFX9-LABEL: v_fshr_v2i24: 1328; GFX9: ; %bb.0: 1329; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1330; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:16 1331; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:20 1332; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:4 1333; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 1334; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 1335; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:8 1336; GFX9-NEXT: s_mov_b32 s4, 0xffffff 1337; GFX9-NEXT: s_mov_b32 s5, 0xaaaaaaab 1338; GFX9-NEXT: s_waitcnt vmcnt(5) 1339; GFX9-NEXT: v_and_b32_e32 v1, s4, v1 1340; GFX9-NEXT: v_mul_hi_u32 v6, v1, s5 1341; GFX9-NEXT: s_waitcnt vmcnt(4) 1342; GFX9-NEXT: v_and_b32_e32 v2, s4, v2 1343; GFX9-NEXT: v_mul_hi_u32 v7, v2, s5 1344; GFX9-NEXT: s_waitcnt vmcnt(2) 1345; GFX9-NEXT: v_and_b32_e32 v9, s4, v4 1346; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v6 1347; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 1348; GFX9-NEXT: v_lshrrev_b32_e32 v7, 4, v7 1349; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 1350; GFX9-NEXT: s_waitcnt vmcnt(0) 1351; GFX9-NEXT: v_and_b32_e32 v10, s4, v8 1352; GFX9-NEXT: v_sub_u32_e32 v1, v1, v6 1353; GFX9-NEXT: v_lshrrev_b32_e32 v6, v1, v10 1354; GFX9-NEXT: v_sub_u32_e32 v2, v2, v7 1355; GFX9-NEXT: v_sub_u32_e32 v7, 24, v1 1356; GFX9-NEXT: v_sub_u32_e32 v10, 24, v2 1357; GFX9-NEXT: v_and_b32_e32 v7, s4, v7 1358; GFX9-NEXT: v_lshrrev_b32_e32 v9, v2, v9 1359; GFX9-NEXT: v_and_b32_e32 v10, 0xffffff, v10 1360; GFX9-NEXT: v_lshl_or_b32 v5, v5, v7, v6 1361; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 1362; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v8, vcc 1363; GFX9-NEXT: v_lshl_or_b32 v3, v3, v10, v9 1364; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 1365; GFX9-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc 1366; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v2 1367; GFX9-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:3 1368; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen offset:2 1369; GFX9-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen 1370; GFX9-NEXT: buffer_store_byte_d16_hi v2, v0, s[0:3], 0 offen offset:5 1371; GFX9-NEXT: buffer_store_byte v3, v0, s[0:3], 0 offen offset:4 1372; GFX9-NEXT: s_waitcnt vmcnt(0) 1373; GFX9-NEXT: s_setpc_b64 s[30:31] 1374; 1375; R600-LABEL: v_fshr_v2i24: 1376; R600: ; %bb.0: 1377; R600-NEXT: CF_END 1378; R600-NEXT: PAD 1379 %ret = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2) 1380 ret <2 x i24> %ret 1381} 1382