1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -march=amdgcn -mcpu=tahiti -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX89,SI 3; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX89,VI 4; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,GFX89,GFX9 5; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck %s -check-prefixes=GCN,R600 6 7declare i32 @llvm.fshr.i32(i32, i32, i32) 8declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) 9declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) 10declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 11declare i16 @llvm.fshr.i16(i16, i16, i16) 12declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) 13declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) 14declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) 15declare i64 @llvm.fshr.i64(i64, i64, i64) 16declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) 17declare i24 @llvm.fshr.i24(i24, i24, i24) 18declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) 19 20define amdgpu_kernel void @fshr_i32(i32 addrspace(1)* %in, i32 %x, i32 %y, i32 %z) { 21; SI-LABEL: fshr_i32: 22; SI: ; %bb.0: ; %entry 23; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 24; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xb 25; SI-NEXT: s_mov_b32 s7, 0xf000 26; SI-NEXT: s_mov_b32 s6, -1 27; SI-NEXT: s_waitcnt lgkmcnt(0) 28; SI-NEXT: v_mov_b32_e32 v0, s1 29; SI-NEXT: v_mov_b32_e32 v1, s2 30; SI-NEXT: v_alignbit_b32 v0, s0, v0, v1 31; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 32; SI-NEXT: s_endpgm 33; 34; VI-LABEL: fshr_i32: 35; VI: ; %bb.0: ; %entry 36; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 37; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c 38; VI-NEXT: s_waitcnt lgkmcnt(0) 39; VI-NEXT: v_mov_b32_e32 v0, s1 40; VI-NEXT: v_mov_b32_e32 v1, s2 41; VI-NEXT: v_alignbit_b32 v2, s0, v0, v1 42; VI-NEXT: v_mov_b32_e32 v0, s4 43; VI-NEXT: v_mov_b32_e32 v1, s5 44; VI-NEXT: flat_store_dword v[0:1], v2 45; VI-NEXT: s_endpgm 46; 47; GFX9-LABEL: fshr_i32: 48; GFX9: ; %bb.0: ; %entry 49; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 50; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c 51; GFX9-NEXT: s_waitcnt lgkmcnt(0) 52; GFX9-NEXT: v_mov_b32_e32 v0, s1 53; GFX9-NEXT: v_mov_b32_e32 v1, s2 54; GFX9-NEXT: v_alignbit_b32 v2, s0, v0, v1 55; GFX9-NEXT: v_mov_b32_e32 v0, s4 56; GFX9-NEXT: v_mov_b32_e32 v1, s5 57; GFX9-NEXT: global_store_dword v[0:1], v2, off 58; GFX9-NEXT: s_endpgm 59; 60; R600-LABEL: fshr_i32: 61; R600: ; %bb.0: ; %entry 62; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 63; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 64; R600-NEXT: CF_END 65; R600-NEXT: PAD 66; R600-NEXT: ALU clause starting at 4: 67; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 68; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 69; R600-NEXT: BIT_ALIGN_INT * T1.X, KC0[2].Z, KC0[2].W, KC0[3].X, 70entry: 71 %0 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 72 store i32 %0, i32 addrspace(1)* %in 73 ret void 74} 75 76define amdgpu_kernel void @fshr_i32_imm(i32 addrspace(1)* %in, i32 %x, i32 %y) { 77; SI-LABEL: fshr_i32_imm: 78; SI: ; %bb.0: ; %entry 79; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 80; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 81; SI-NEXT: s_mov_b32 s7, 0xf000 82; SI-NEXT: s_mov_b32 s6, -1 83; SI-NEXT: s_waitcnt lgkmcnt(0) 84; SI-NEXT: v_mov_b32_e32 v0, s1 85; SI-NEXT: v_alignbit_b32 v0, s0, v0, 7 86; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 87; SI-NEXT: s_endpgm 88; 89; VI-LABEL: fshr_i32_imm: 90; VI: ; %bb.0: ; %entry 91; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 92; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c 93; VI-NEXT: s_waitcnt lgkmcnt(0) 94; VI-NEXT: v_mov_b32_e32 v0, s1 95; VI-NEXT: v_alignbit_b32 v2, s0, v0, 7 96; VI-NEXT: v_mov_b32_e32 v0, s2 97; VI-NEXT: v_mov_b32_e32 v1, s3 98; VI-NEXT: flat_store_dword v[0:1], v2 99; VI-NEXT: s_endpgm 100; 101; GFX9-LABEL: fshr_i32_imm: 102; GFX9: ; %bb.0: ; %entry 103; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 104; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x2c 105; GFX9-NEXT: s_waitcnt lgkmcnt(0) 106; GFX9-NEXT: v_mov_b32_e32 v0, s1 107; GFX9-NEXT: v_alignbit_b32 v2, s0, v0, 7 108; GFX9-NEXT: v_mov_b32_e32 v0, s2 109; GFX9-NEXT: v_mov_b32_e32 v1, s3 110; GFX9-NEXT: global_store_dword v[0:1], v2, off 111; GFX9-NEXT: s_endpgm 112; 113; R600-LABEL: fshr_i32_imm: 114; R600: ; %bb.0: ; %entry 115; R600-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 116; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 117; R600-NEXT: CF_END 118; R600-NEXT: PAD 119; R600-NEXT: ALU clause starting at 4: 120; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 121; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 122; R600-NEXT: BIT_ALIGN_INT * T1.X, KC0[2].Z, KC0[2].W, literal.x, 123; R600-NEXT: 7(9.809089e-45), 0(0.000000e+00) 124entry: 125 %0 = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 7) 126 store i32 %0, i32 addrspace(1)* %in 127 ret void 128} 129 130define amdgpu_kernel void @fshr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { 131; SI-LABEL: fshr_v2i32: 132; SI: ; %bb.0: ; %entry 133; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 134; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb 135; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd 136; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xf 137; SI-NEXT: s_mov_b32 s7, 0xf000 138; SI-NEXT: s_mov_b32 s6, -1 139; SI-NEXT: s_waitcnt lgkmcnt(0) 140; SI-NEXT: v_mov_b32_e32 v0, s9 141; SI-NEXT: s_and_b32 s1, s1, 31 142; SI-NEXT: v_mov_b32_e32 v1, s1 143; SI-NEXT: s_and_b32 s0, s0, 31 144; SI-NEXT: v_alignbit_b32 v1, s3, v0, v1 145; SI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0 146; SI-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 147; SI-NEXT: v_mov_b32_e32 v0, s8 148; SI-NEXT: v_mov_b32_e32 v2, s0 149; SI-NEXT: v_alignbit_b32 v2, s2, v0, v2 150; SI-NEXT: v_cmp_eq_u32_e64 vcc, s0, 0 151; SI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 152; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 153; SI-NEXT: s_endpgm 154; 155; VI-LABEL: fshr_v2i32: 156; VI: ; %bb.0: ; %entry 157; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 158; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c 159; VI-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x34 160; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c 161; VI-NEXT: s_waitcnt lgkmcnt(0) 162; VI-NEXT: v_mov_b32_e32 v0, s7 163; VI-NEXT: s_and_b32 s1, s1, 31 164; VI-NEXT: v_mov_b32_e32 v1, s1 165; VI-NEXT: s_and_b32 s0, s0, 31 166; VI-NEXT: v_alignbit_b32 v1, s5, v0, v1 167; VI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0 168; VI-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 169; VI-NEXT: v_mov_b32_e32 v0, s6 170; VI-NEXT: v_mov_b32_e32 v2, s0 171; VI-NEXT: v_alignbit_b32 v2, s4, v0, v2 172; VI-NEXT: v_cmp_eq_u32_e64 vcc, s0, 0 173; VI-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 174; VI-NEXT: v_mov_b32_e32 v2, s2 175; VI-NEXT: v_mov_b32_e32 v3, s3 176; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 177; VI-NEXT: s_endpgm 178; 179; GFX9-LABEL: fshr_v2i32: 180; GFX9: ; %bb.0: ; %entry 181; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 182; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c 183; GFX9-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x34 184; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x3c 185; GFX9-NEXT: s_waitcnt lgkmcnt(0) 186; GFX9-NEXT: v_mov_b32_e32 v0, s7 187; GFX9-NEXT: s_and_b32 s1, s1, 31 188; GFX9-NEXT: v_mov_b32_e32 v1, s1 189; GFX9-NEXT: s_and_b32 s0, s0, 31 190; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, v1 191; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0 192; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 193; GFX9-NEXT: v_mov_b32_e32 v0, s6 194; GFX9-NEXT: v_mov_b32_e32 v2, s0 195; GFX9-NEXT: v_alignbit_b32 v2, s4, v0, v2 196; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 0 197; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 198; GFX9-NEXT: v_mov_b32_e32 v2, s2 199; GFX9-NEXT: v_mov_b32_e32 v3, s3 200; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off 201; GFX9-NEXT: s_endpgm 202; 203; R600-LABEL: fshr_v2i32: 204; R600: ; %bb.0: ; %entry 205; R600-NEXT: ALU 11, @4, KC0[CB0:0-32], KC1[] 206; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 207; R600-NEXT: CF_END 208; R600-NEXT: PAD 209; R600-NEXT: ALU clause starting at 4: 210; R600-NEXT: AND_INT * T0.W, KC0[4].X, literal.x, 211; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00) 212; R600-NEXT: BIT_ALIGN_INT T1.W, KC0[3].X, KC0[3].Z, PV.W, 213; R600-NEXT: SETE_INT * T0.W, PV.W, 0.0, 214; R600-NEXT: CNDE_INT T0.Y, PS, PV.W, KC0[3].Z, 215; R600-NEXT: AND_INT * T0.W, KC0[3].W, literal.x, 216; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00) 217; R600-NEXT: BIT_ALIGN_INT T1.W, KC0[2].W, KC0[3].Y, PV.W, 218; R600-NEXT: SETE_INT * T0.W, PV.W, 0.0, 219; R600-NEXT: CNDE_INT T0.X, PS, PV.W, KC0[3].Y, 220; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 221; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 222entry: 223 %0 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) 224 store <2 x i32> %0, <2 x i32> addrspace(1)* %in 225 ret void 226} 227 228define amdgpu_kernel void @fshr_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) { 229; SI-LABEL: fshr_v2i32_imm: 230; SI: ; %bb.0: ; %entry 231; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 232; SI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb 233; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd 234; SI-NEXT: s_mov_b32 s7, 0xf000 235; SI-NEXT: s_mov_b32 s6, -1 236; SI-NEXT: s_waitcnt lgkmcnt(0) 237; SI-NEXT: v_mov_b32_e32 v0, s1 238; SI-NEXT: v_alignbit_b32 v1, s3, v0, 9 239; SI-NEXT: v_mov_b32_e32 v0, s0 240; SI-NEXT: v_alignbit_b32 v0, s2, v0, 7 241; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 242; SI-NEXT: s_endpgm 243; 244; VI-LABEL: fshr_v2i32_imm: 245; VI: ; %bb.0: ; %entry 246; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 247; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c 248; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 249; VI-NEXT: s_waitcnt lgkmcnt(0) 250; VI-NEXT: v_mov_b32_e32 v0, s1 251; VI-NEXT: v_mov_b32_e32 v2, s0 252; VI-NEXT: v_alignbit_b32 v1, s5, v0, 9 253; VI-NEXT: v_alignbit_b32 v0, s4, v2, 7 254; VI-NEXT: v_mov_b32_e32 v2, s2 255; VI-NEXT: v_mov_b32_e32 v3, s3 256; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 257; VI-NEXT: s_endpgm 258; 259; GFX9-LABEL: fshr_v2i32_imm: 260; GFX9: ; %bb.0: ; %entry 261; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 262; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c 263; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 264; GFX9-NEXT: s_waitcnt lgkmcnt(0) 265; GFX9-NEXT: v_mov_b32_e32 v0, s1 266; GFX9-NEXT: v_mov_b32_e32 v2, s0 267; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, 9 268; GFX9-NEXT: v_alignbit_b32 v0, s4, v2, 7 269; GFX9-NEXT: v_mov_b32_e32 v2, s2 270; GFX9-NEXT: v_mov_b32_e32 v3, s3 271; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off 272; GFX9-NEXT: s_endpgm 273; 274; R600-LABEL: fshr_v2i32_imm: 275; R600: ; %bb.0: ; %entry 276; R600-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] 277; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 278; R600-NEXT: CF_END 279; R600-NEXT: PAD 280; R600-NEXT: ALU clause starting at 4: 281; R600-NEXT: BIT_ALIGN_INT * T0.Y, KC0[3].X, KC0[3].Z, literal.x, 282; R600-NEXT: 9(1.261169e-44), 0(0.000000e+00) 283; R600-NEXT: BIT_ALIGN_INT * T0.X, KC0[2].W, KC0[3].Y, literal.x, 284; R600-NEXT: 7(9.809089e-45), 0(0.000000e+00) 285; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 286; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 287entry: 288 %0 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 7, i32 9>) 289 store <2 x i32> %0, <2 x i32> addrspace(1)* %in 290 ret void 291} 292 293define amdgpu_kernel void @fshr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { 294; SI-LABEL: fshr_v4i32: 295; SI: ; %bb.0: ; %entry 296; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 297; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd 298; SI-NEXT: s_load_dwordx4 s[12:15], s[0:1], 0x11 299; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x15 300; SI-NEXT: s_mov_b32 s7, 0xf000 301; SI-NEXT: s_mov_b32 s6, -1 302; SI-NEXT: s_waitcnt lgkmcnt(0) 303; SI-NEXT: v_mov_b32_e32 v0, s15 304; SI-NEXT: s_and_b32 s3, s3, 31 305; SI-NEXT: v_mov_b32_e32 v1, s3 306; SI-NEXT: v_alignbit_b32 v1, s11, v0, v1 307; SI-NEXT: v_cmp_eq_u32_e64 vcc, s3, 0 308; SI-NEXT: s_and_b32 s2, s2, 31 309; SI-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc 310; SI-NEXT: v_mov_b32_e32 v0, s14 311; SI-NEXT: v_mov_b32_e32 v1, s2 312; SI-NEXT: v_alignbit_b32 v1, s10, v0, v1 313; SI-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 314; SI-NEXT: s_and_b32 s1, s1, 31 315; SI-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc 316; SI-NEXT: v_mov_b32_e32 v0, s13 317; SI-NEXT: v_mov_b32_e32 v1, s1 318; SI-NEXT: s_and_b32 s0, s0, 31 319; SI-NEXT: v_alignbit_b32 v1, s9, v0, v1 320; SI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0 321; SI-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 322; SI-NEXT: v_mov_b32_e32 v0, s12 323; SI-NEXT: v_mov_b32_e32 v4, s0 324; SI-NEXT: v_alignbit_b32 v4, s8, v0, v4 325; SI-NEXT: v_cmp_eq_u32_e64 vcc, s0, 0 326; SI-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 327; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 328; SI-NEXT: s_endpgm 329; 330; VI-LABEL: fshr_v4i32: 331; VI: ; %bb.0: ; %entry 332; VI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x24 333; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 334; VI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x44 335; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x54 336; VI-NEXT: s_waitcnt lgkmcnt(0) 337; VI-NEXT: v_mov_b32_e32 v0, s11 338; VI-NEXT: s_and_b32 s3, s3, 31 339; VI-NEXT: v_mov_b32_e32 v1, s3 340; VI-NEXT: v_alignbit_b32 v1, s7, v0, v1 341; VI-NEXT: v_cmp_eq_u32_e64 vcc, s3, 0 342; VI-NEXT: s_and_b32 s2, s2, 31 343; VI-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc 344; VI-NEXT: v_mov_b32_e32 v0, s10 345; VI-NEXT: v_mov_b32_e32 v1, s2 346; VI-NEXT: v_alignbit_b32 v1, s6, v0, v1 347; VI-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 348; VI-NEXT: s_and_b32 s1, s1, 31 349; VI-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc 350; VI-NEXT: v_mov_b32_e32 v0, s9 351; VI-NEXT: v_mov_b32_e32 v1, s1 352; VI-NEXT: s_and_b32 s0, s0, 31 353; VI-NEXT: v_alignbit_b32 v1, s5, v0, v1 354; VI-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0 355; VI-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 356; VI-NEXT: v_mov_b32_e32 v0, s8 357; VI-NEXT: v_mov_b32_e32 v4, s0 358; VI-NEXT: v_alignbit_b32 v4, s4, v0, v4 359; VI-NEXT: v_cmp_eq_u32_e64 vcc, s0, 0 360; VI-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 361; VI-NEXT: v_mov_b32_e32 v4, s12 362; VI-NEXT: v_mov_b32_e32 v5, s13 363; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 364; VI-NEXT: s_endpgm 365; 366; GFX9-LABEL: fshr_v4i32: 367; GFX9: ; %bb.0: ; %entry 368; GFX9-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x24 369; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 370; GFX9-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x44 371; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x54 372; GFX9-NEXT: s_waitcnt lgkmcnt(0) 373; GFX9-NEXT: v_mov_b32_e32 v0, s11 374; GFX9-NEXT: s_and_b32 s3, s3, 31 375; GFX9-NEXT: v_mov_b32_e32 v1, s3 376; GFX9-NEXT: v_alignbit_b32 v1, s7, v0, v1 377; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s3, 0 378; GFX9-NEXT: s_and_b32 s2, s2, 31 379; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v0, vcc 380; GFX9-NEXT: v_mov_b32_e32 v0, s10 381; GFX9-NEXT: v_mov_b32_e32 v1, s2 382; GFX9-NEXT: v_alignbit_b32 v1, s6, v0, v1 383; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s2, 0 384; GFX9-NEXT: s_and_b32 s1, s1, 31 385; GFX9-NEXT: v_cndmask_b32_e32 v2, v1, v0, vcc 386; GFX9-NEXT: v_mov_b32_e32 v0, s9 387; GFX9-NEXT: v_mov_b32_e32 v1, s1 388; GFX9-NEXT: s_and_b32 s0, s0, 31 389; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, v1 390; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s1, 0 391; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v0, vcc 392; GFX9-NEXT: v_mov_b32_e32 v0, s8 393; GFX9-NEXT: v_mov_b32_e32 v4, s0 394; GFX9-NEXT: v_alignbit_b32 v4, s4, v0, v4 395; GFX9-NEXT: v_cmp_eq_u32_e64 vcc, s0, 0 396; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 397; GFX9-NEXT: v_mov_b32_e32 v4, s12 398; GFX9-NEXT: v_mov_b32_e32 v5, s13 399; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off 400; GFX9-NEXT: s_endpgm 401; 402; R600-LABEL: fshr_v4i32: 403; R600: ; %bb.0: ; %entry 404; R600-NEXT: ALU 20, @4, KC0[CB0:0-32], KC1[] 405; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 406; R600-NEXT: CF_END 407; R600-NEXT: PAD 408; R600-NEXT: ALU clause starting at 4: 409; R600-NEXT: AND_INT T0.W, KC0[5].Z, literal.x, 410; R600-NEXT: AND_INT * T1.W, KC0[6].X, literal.x, 411; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00) 412; R600-NEXT: SETE_INT T0.Z, PS, 0.0, 413; R600-NEXT: BIT_ALIGN_INT * T1.W, KC0[4].X, KC0[5].X, PS, 414; R600-NEXT: AND_INT * T2.W, KC0[5].W, literal.x, 415; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00) 416; R600-NEXT: SETE_INT T1.Z, PV.W, 0.0, 417; R600-NEXT: BIT_ALIGN_INT * T2.W, KC0[3].W, KC0[4].W, PV.W, 418; R600-NEXT: CNDE_INT * T1.W, T0.Z, T1.W, KC0[5].X, 419; R600-NEXT: CNDE_INT T1.Z, T1.Z, T2.W, KC0[4].W, 420; R600-NEXT: BIT_ALIGN_INT T2.W, KC0[3].Z, KC0[4].Z, T0.W, 421; R600-NEXT: SETE_INT * T0.W, T0.W, 0.0, 422; R600-NEXT: CNDE_INT T1.Y, PS, PV.W, KC0[4].Z, 423; R600-NEXT: AND_INT * T0.W, KC0[5].Y, literal.x, 424; R600-NEXT: 31(4.344025e-44), 0(0.000000e+00) 425; R600-NEXT: BIT_ALIGN_INT T2.W, KC0[3].Y, KC0[4].Y, PV.W, 426; R600-NEXT: SETE_INT * T0.W, PV.W, 0.0, 427; R600-NEXT: CNDE_INT T1.X, PS, PV.W, KC0[4].Y, 428; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 429; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 430entry: 431 %0 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) 432 store <4 x i32> %0, <4 x i32> addrspace(1)* %in 433 ret void 434} 435 436define amdgpu_kernel void @fshr_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) { 437; SI-LABEL: fshr_v4i32_imm: 438; SI: ; %bb.0: ; %entry 439; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 440; SI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0xd 441; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x11 442; SI-NEXT: s_mov_b32 s7, 0xf000 443; SI-NEXT: s_mov_b32 s6, -1 444; SI-NEXT: s_waitcnt lgkmcnt(0) 445; SI-NEXT: v_mov_b32_e32 v0, s3 446; SI-NEXT: v_alignbit_b32 v3, s11, v0, 1 447; SI-NEXT: v_mov_b32_e32 v0, s2 448; SI-NEXT: v_alignbit_b32 v2, s10, v0, 9 449; SI-NEXT: v_mov_b32_e32 v0, s1 450; SI-NEXT: v_alignbit_b32 v1, s9, v0, 7 451; SI-NEXT: v_mov_b32_e32 v0, s0 452; SI-NEXT: v_alignbit_b32 v0, s8, v0, 1 453; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 454; SI-NEXT: s_endpgm 455; 456; VI-LABEL: fshr_v4i32_imm: 457; VI: ; %bb.0: ; %entry 458; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 459; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 460; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x44 461; VI-NEXT: s_waitcnt lgkmcnt(0) 462; VI-NEXT: v_mov_b32_e32 v4, s8 463; VI-NEXT: v_mov_b32_e32 v5, s9 464; VI-NEXT: v_mov_b32_e32 v0, s3 465; VI-NEXT: v_mov_b32_e32 v1, s2 466; VI-NEXT: v_alignbit_b32 v3, s7, v0, 1 467; VI-NEXT: v_mov_b32_e32 v0, s1 468; VI-NEXT: v_alignbit_b32 v2, s6, v1, 9 469; VI-NEXT: v_alignbit_b32 v1, s5, v0, 7 470; VI-NEXT: v_mov_b32_e32 v0, s0 471; VI-NEXT: v_alignbit_b32 v0, s4, v0, 1 472; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] 473; VI-NEXT: s_endpgm 474; 475; GFX9-LABEL: fshr_v4i32_imm: 476; GFX9: ; %bb.0: ; %entry 477; GFX9-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24 478; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 479; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x44 480; GFX9-NEXT: s_waitcnt lgkmcnt(0) 481; GFX9-NEXT: v_mov_b32_e32 v4, s8 482; GFX9-NEXT: v_mov_b32_e32 v5, s9 483; GFX9-NEXT: v_mov_b32_e32 v0, s3 484; GFX9-NEXT: v_mov_b32_e32 v1, s2 485; GFX9-NEXT: v_alignbit_b32 v3, s7, v0, 1 486; GFX9-NEXT: v_mov_b32_e32 v0, s1 487; GFX9-NEXT: v_alignbit_b32 v2, s6, v1, 9 488; GFX9-NEXT: v_alignbit_b32 v1, s5, v0, 7 489; GFX9-NEXT: v_mov_b32_e32 v0, s0 490; GFX9-NEXT: v_alignbit_b32 v0, s4, v0, 1 491; GFX9-NEXT: global_store_dwordx4 v[4:5], v[0:3], off 492; GFX9-NEXT: s_endpgm 493; 494; R600-LABEL: fshr_v4i32_imm: 495; R600: ; %bb.0: ; %entry 496; R600-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 497; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 498; R600-NEXT: CF_END 499; R600-NEXT: PAD 500; R600-NEXT: ALU clause starting at 4: 501; R600-NEXT: BIT_ALIGN_INT * T0.W, KC0[4].X, KC0[5].X, 1, 502; R600-NEXT: BIT_ALIGN_INT * T0.Z, KC0[3].W, KC0[4].W, literal.x, 503; R600-NEXT: 9(1.261169e-44), 0(0.000000e+00) 504; R600-NEXT: BIT_ALIGN_INT * T0.Y, KC0[3].Z, KC0[4].Z, literal.x, 505; R600-NEXT: 7(9.809089e-45), 0(0.000000e+00) 506; R600-NEXT: BIT_ALIGN_INT * T0.X, KC0[3].Y, KC0[4].Y, 1, 507; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 508; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 509entry: 510 %0 = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 1, i32 7, i32 9, i32 33>) 511 store <4 x i32> %0, <4 x i32> addrspace(1)* %in 512 ret void 513} 514 515define i32 @v_fshr_i32(i32 %src0, i32 %src1, i32 %src2) { 516; GFX89-LABEL: v_fshr_i32: 517; GFX89: ; %bb.0: 518; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 519; GFX89-NEXT: v_alignbit_b32 v0, v0, v1, v2 520; GFX89-NEXT: s_setpc_b64 s[30:31] 521; 522; R600-LABEL: v_fshr_i32: 523; R600: ; %bb.0: 524; R600-NEXT: CF_END 525; R600-NEXT: PAD 526 %ret = call i32 @llvm.fshr.i32(i32 %src0, i32 %src1, i32 %src2) 527 ret i32 %ret 528} 529 530define <2 x i32> @v_fshr_v2i32(<2 x i32> %src0, <2 x i32> %src1, <2 x i32> %src2) { 531; GFX89-LABEL: v_fshr_v2i32: 532; GFX89: ; %bb.0: 533; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 534; GFX89-NEXT: v_and_b32_e32 v4, 31, v4 535; GFX89-NEXT: v_alignbit_b32 v0, v0, v2, v4 536; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 537; GFX89-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 538; GFX89-NEXT: v_and_b32_e32 v2, 31, v5 539; GFX89-NEXT: v_alignbit_b32 v1, v1, v3, v2 540; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 541; GFX89-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 542; GFX89-NEXT: s_setpc_b64 s[30:31] 543; 544; R600-LABEL: v_fshr_v2i32: 545; R600: ; %bb.0: 546; R600-NEXT: CF_END 547; R600-NEXT: PAD 548 %ret = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %src0, <2 x i32> %src1, <2 x i32> %src2) 549 ret <2 x i32> %ret 550} 551 552define <3 x i32> @v_fshr_v3i32(<3 x i32> %src0, <3 x i32> %src1, <3 x i32> %src2) { 553; GFX89-LABEL: v_fshr_v3i32: 554; GFX89: ; %bb.0: 555; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 556; GFX89-NEXT: v_and_b32_e32 v6, 31, v6 557; GFX89-NEXT: v_alignbit_b32 v0, v0, v3, v6 558; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v6 559; GFX89-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 560; GFX89-NEXT: v_and_b32_e32 v3, 31, v7 561; GFX89-NEXT: v_alignbit_b32 v1, v1, v4, v3 562; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 563; GFX89-NEXT: v_and_b32_e32 v3, 31, v8 564; GFX89-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 565; GFX89-NEXT: v_alignbit_b32 v2, v2, v5, v3 566; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 567; GFX89-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 568; GFX89-NEXT: s_setpc_b64 s[30:31] 569; 570; R600-LABEL: v_fshr_v3i32: 571; R600: ; %bb.0: 572; R600-NEXT: CF_END 573; R600-NEXT: PAD 574 %ret = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> %src0, <3 x i32> %src1, <3 x i32> %src2) 575 ret <3 x i32> %ret 576} 577 578define <4 x i32> @v_fshr_v4i32(<4 x i32> %src0, <4 x i32> %src1, <4 x i32> %src2) { 579; GFX89-LABEL: v_fshr_v4i32: 580; GFX89: ; %bb.0: 581; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 582; GFX89-NEXT: v_and_b32_e32 v8, 31, v8 583; GFX89-NEXT: v_alignbit_b32 v0, v0, v4, v8 584; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8 585; GFX89-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 586; GFX89-NEXT: v_and_b32_e32 v4, 31, v9 587; GFX89-NEXT: v_alignbit_b32 v1, v1, v5, v4 588; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 589; GFX89-NEXT: v_and_b32_e32 v4, 31, v10 590; GFX89-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 591; GFX89-NEXT: v_alignbit_b32 v2, v2, v6, v4 592; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 593; GFX89-NEXT: v_and_b32_e32 v4, 31, v11 594; GFX89-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 595; GFX89-NEXT: v_alignbit_b32 v3, v3, v7, v4 596; GFX89-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 597; GFX89-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 598; GFX89-NEXT: s_setpc_b64 s[30:31] 599; 600; R600-LABEL: v_fshr_v4i32: 601; R600: ; %bb.0: 602; R600-NEXT: CF_END 603; R600-NEXT: PAD 604 %ret = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %src0, <4 x i32> %src1, <4 x i32> %src2) 605 ret <4 x i32> %ret 606} 607 608define i16 @v_fshr_i16(i16 %src0, i16 %src1, i16 %src2) { 609; SI-LABEL: v_fshr_i16: 610; SI: ; %bb.0: 611; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 612; SI-NEXT: v_and_b32_e32 v2, 15, v2 613; SI-NEXT: v_and_b32_e32 v3, 0xffff, v1 614; SI-NEXT: v_sub_i32_e32 v4, vcc, 16, v2 615; SI-NEXT: v_lshr_b32_e32 v3, v3, v2 616; SI-NEXT: v_lshl_b32_e32 v0, v0, v4 617; SI-NEXT: v_or_b32_e32 v0, v0, v3 618; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 619; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 620; SI-NEXT: s_setpc_b64 s[30:31] 621; 622; VI-LABEL: v_fshr_i16: 623; VI: ; %bb.0: 624; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 625; VI-NEXT: v_and_b32_e32 v2, 15, v2 626; VI-NEXT: v_sub_u16_e32 v4, 16, v2 627; VI-NEXT: v_lshrrev_b16_e32 v3, v2, v1 628; VI-NEXT: v_lshlrev_b16_e32 v0, v4, v0 629; VI-NEXT: v_or_b32_e32 v0, v0, v3 630; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v2 631; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 632; VI-NEXT: s_setpc_b64 s[30:31] 633; 634; GFX9-LABEL: v_fshr_i16: 635; GFX9: ; %bb.0: 636; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 637; GFX9-NEXT: v_and_b32_e32 v2, 15, v2 638; GFX9-NEXT: v_sub_u16_e32 v4, 16, v2 639; GFX9-NEXT: v_lshrrev_b16_e32 v3, v2, v1 640; GFX9-NEXT: v_lshlrev_b16_e32 v0, v4, v0 641; GFX9-NEXT: v_or_b32_e32 v0, v0, v3 642; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v2 643; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 644; GFX9-NEXT: s_setpc_b64 s[30:31] 645; 646; R600-LABEL: v_fshr_i16: 647; R600: ; %bb.0: 648; R600-NEXT: CF_END 649; R600-NEXT: PAD 650 %ret = call i16 @llvm.fshr.i16(i16 %src0, i16 %src1, i16 %src2) 651 ret i16 %ret 652} 653 654define <2 x i16> @v_fshr_v2i16(<2 x i16> %src0, <2 x i16> %src1, <2 x i16> %src2) { 655; SI-LABEL: v_fshr_v2i16: 656; SI: ; %bb.0: 657; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 658; SI-NEXT: s_mov_b32 s4, 0xffff 659; SI-NEXT: v_and_b32_e32 v5, 15, v5 660; SI-NEXT: v_and_b32_e32 v7, s4, v3 661; SI-NEXT: v_sub_i32_e32 v8, vcc, 16, v5 662; SI-NEXT: v_lshr_b32_e32 v7, v7, v5 663; SI-NEXT: v_lshl_b32_e32 v1, v1, v8 664; SI-NEXT: v_or_b32_e32 v1, v1, v7 665; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 666; SI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 667; SI-NEXT: v_and_b32_e32 v3, 15, v4 668; SI-NEXT: v_sub_i32_e32 v5, vcc, 16, v3 669; SI-NEXT: v_and_b32_e32 v6, s4, v2 670; SI-NEXT: v_lshr_b32_e32 v4, v6, v3 671; SI-NEXT: v_lshl_b32_e32 v0, v0, v5 672; SI-NEXT: v_or_b32_e32 v0, v0, v4 673; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 674; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 675; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 676; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 677; SI-NEXT: v_or_b32_e32 v0, v0, v1 678; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 679; SI-NEXT: s_setpc_b64 s[30:31] 680; 681; VI-LABEL: v_fshr_v2i16: 682; VI: ; %bb.0: 683; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 684; VI-NEXT: v_and_b32_e32 v3, 0xf000f, v2 685; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v1 686; VI-NEXT: v_bfe_u32 v2, v2, 16, 4 687; VI-NEXT: v_lshrrev_b16_e32 v4, v3, v1 688; VI-NEXT: v_lshrrev_b16_sdwa v6, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 689; VI-NEXT: v_or_b32_e32 v4, v4, v6 690; VI-NEXT: v_sub_u16_e32 v6, 16, v2 691; VI-NEXT: v_sub_u16_e32 v7, 16, v3 692; VI-NEXT: v_lshlrev_b16_sdwa v6, v6, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 693; VI-NEXT: v_lshlrev_b16_e32 v0, v7, v0 694; VI-NEXT: v_or_b32_e32 v0, v0, v6 695; VI-NEXT: v_or_b32_e32 v0, v0, v4 696; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v3 697; VI-NEXT: v_cndmask_b32_e32 v1, v0, v1, vcc 698; VI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 699; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v2 700; VI-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 701; VI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 702; VI-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 703; VI-NEXT: s_setpc_b64 s[30:31] 704; 705; GFX9-LABEL: v_fshr_v2i16: 706; GFX9: ; %bb.0: 707; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 708; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v2 709; GFX9-NEXT: v_pk_sub_i16 v4, 16, v2 op_sel_hi:[0,1] 710; GFX9-NEXT: v_pk_lshlrev_b16 v0, v4, v0 711; GFX9-NEXT: v_pk_lshrrev_b16 v3, v2, v1 712; GFX9-NEXT: v_or_b32_e32 v0, v0, v3 713; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v2 714; GFX9-NEXT: v_mov_b32_e32 v4, 0 715; GFX9-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc 716; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 717; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1 718; GFX9-NEXT: v_cmp_eq_u16_sdwa s[4:5], v2, v4 src0_sel:WORD_1 src1_sel:DWORD 719; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] 720; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v3 721; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1 722; GFX9-NEXT: s_setpc_b64 s[30:31] 723; 724; R600-LABEL: v_fshr_v2i16: 725; R600: ; %bb.0: 726; R600-NEXT: CF_END 727; R600-NEXT: PAD 728 %ret = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %src0, <2 x i16> %src1, <2 x i16> %src2) 729 ret <2 x i16> %ret 730} 731 732define <3 x i16> @v_fshr_v3i16(<3 x i16> %src0, <3 x i16> %src1, <3 x i16> %src2) { 733; SI-LABEL: v_fshr_v3i16: 734; SI: ; %bb.0: 735; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 736; SI-NEXT: s_mov_b32 s4, 0xffff 737; SI-NEXT: v_and_b32_e32 v7, 15, v7 738; SI-NEXT: v_and_b32_e32 v12, s4, v4 739; SI-NEXT: v_sub_i32_e32 v13, vcc, 16, v7 740; SI-NEXT: v_lshr_b32_e32 v12, v12, v7 741; SI-NEXT: v_lshl_b32_e32 v1, v1, v13 742; SI-NEXT: v_or_b32_e32 v1, v1, v12 743; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 744; SI-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 745; SI-NEXT: v_and_b32_e32 v4, 15, v6 746; SI-NEXT: v_sub_i32_e32 v7, vcc, 16, v4 747; SI-NEXT: v_and_b32_e32 v11, s4, v3 748; SI-NEXT: v_lshr_b32_e32 v6, v11, v4 749; SI-NEXT: v_lshl_b32_e32 v0, v0, v7 750; SI-NEXT: v_or_b32_e32 v0, v0, v6 751; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4 752; SI-NEXT: v_mov_b32_e32 v9, 0xffff 753; SI-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 754; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 755; SI-NEXT: v_and_b32_e32 v0, v9, v0 756; SI-NEXT: v_or_b32_e32 v0, v0, v1 757; SI-NEXT: v_and_b32_e32 v1, 15, v8 758; SI-NEXT: v_sub_i32_e32 v4, vcc, 16, v1 759; SI-NEXT: v_and_b32_e32 v10, s4, v5 760; SI-NEXT: v_lshr_b32_e32 v3, v10, v1 761; SI-NEXT: v_lshl_b32_e32 v2, v2, v4 762; SI-NEXT: v_or_b32_e32 v2, v2, v3 763; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 764; SI-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc 765; SI-NEXT: v_and_b32_e32 v2, v9, v1 766; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16 767; SI-NEXT: s_setpc_b64 s[30:31] 768; 769; VI-LABEL: v_fshr_v3i16: 770; VI: ; %bb.0: 771; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 772; VI-NEXT: v_mov_b32_e32 v6, 15 773; VI-NEXT: v_and_b32_sdwa v6, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 774; VI-NEXT: v_lshrrev_b32_e32 v7, 16, v2 775; VI-NEXT: v_lshrrev_b16_e32 v8, v6, v7 776; VI-NEXT: v_sub_u16_e32 v6, 16, v6 777; VI-NEXT: v_lshlrev_b16_sdwa v6, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 778; VI-NEXT: v_or_b32_e32 v6, v6, v8 779; VI-NEXT: v_bfe_u32 v8, v4, 16, 4 780; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v8 781; VI-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc 782; VI-NEXT: v_and_b32_e32 v7, 15, v5 783; VI-NEXT: v_lshrrev_b16_e32 v8, v7, v3 784; VI-NEXT: v_sub_u16_e32 v7, 16, v7 785; VI-NEXT: v_lshlrev_b16_e32 v1, v7, v1 786; VI-NEXT: v_and_b32_e32 v5, 15, v5 787; VI-NEXT: v_or_b32_e32 v1, v1, v8 788; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v5 789; VI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 790; VI-NEXT: v_and_b32_e32 v3, 15, v4 791; VI-NEXT: v_lshrrev_b16_e32 v5, v3, v2 792; VI-NEXT: v_sub_u16_e32 v3, 16, v3 793; VI-NEXT: v_lshlrev_b16_e32 v0, v3, v0 794; VI-NEXT: v_and_b32_e32 v3, 0xf000f, v4 795; VI-NEXT: v_or_b32_e32 v0, v0, v5 796; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v3 797; VI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 798; VI-NEXT: v_lshlrev_b32_e32 v2, 16, v6 799; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 800; VI-NEXT: s_setpc_b64 s[30:31] 801; 802; GFX9-LABEL: v_fshr_v3i16: 803; GFX9: ; %bb.0: 804; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 805; GFX9-NEXT: v_mov_b32_e32 v7, 15 806; GFX9-NEXT: v_and_b32_e32 v6, 15, v4 807; GFX9-NEXT: v_mov_b32_e32 v8, 0xffff 808; GFX9-NEXT: v_and_b32_sdwa v7, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 809; GFX9-NEXT: v_and_b32_e32 v6, v8, v6 810; GFX9-NEXT: v_lshl_or_b32 v6, v7, 16, v6 811; GFX9-NEXT: v_pk_lshrrev_b16 v7, v6, v2 812; GFX9-NEXT: v_pk_sub_i16 v6, 16, v6 op_sel_hi:[0,1] 813; GFX9-NEXT: s_mov_b32 s6, 0xf000f 814; GFX9-NEXT: v_pk_lshlrev_b16 v0, v6, v0 815; GFX9-NEXT: v_and_b32_e32 v4, s6, v4 816; GFX9-NEXT: v_or_b32_e32 v0, v0, v7 817; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v4 818; GFX9-NEXT: v_mov_b32_e32 v7, 0 819; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v2, vcc 820; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 821; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2 822; GFX9-NEXT: v_cmp_eq_u16_sdwa s[4:5], v4, v7 src0_sel:WORD_1 src1_sel:DWORD 823; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] 824; GFX9-NEXT: v_and_b32_e32 v2, 15, v5 825; GFX9-NEXT: v_and_b32_e32 v2, v8, v2 826; GFX9-NEXT: v_pk_lshrrev_b16 v4, v2, v3 827; GFX9-NEXT: v_pk_sub_i16 v2, 16, v2 828; GFX9-NEXT: v_pk_lshlrev_b16 v1, v2, v1 829; GFX9-NEXT: v_and_b32_e32 v2, s6, v5 830; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v2 831; GFX9-NEXT: v_or_b32_e32 v1, v1, v4 832; GFX9-NEXT: v_and_b32_e32 v2, v8, v6 833; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 834; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v2 835; GFX9-NEXT: s_setpc_b64 s[30:31] 836; 837; R600-LABEL: v_fshr_v3i16: 838; R600: ; %bb.0: 839; R600-NEXT: CF_END 840; R600-NEXT: PAD 841 %ret = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %src0, <3 x i16> %src1, <3 x i16> %src2) 842 ret <3 x i16> %ret 843} 844 845define <4 x i16> @v_fshr_v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2) { 846; SI-LABEL: v_fshr_v4i16: 847; SI: ; %bb.0: 848; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 849; SI-NEXT: s_mov_b32 s4, 0xffff 850; SI-NEXT: v_and_b32_e32 v11, 15, v11 851; SI-NEXT: v_and_b32_e32 v16, s4, v7 852; SI-NEXT: v_sub_i32_e32 v17, vcc, 16, v11 853; SI-NEXT: v_lshr_b32_e32 v16, v16, v11 854; SI-NEXT: v_lshl_b32_e32 v3, v3, v17 855; SI-NEXT: v_or_b32_e32 v3, v3, v16 856; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11 857; SI-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 858; SI-NEXT: v_and_b32_e32 v7, 15, v10 859; SI-NEXT: v_sub_i32_e32 v11, vcc, 16, v7 860; SI-NEXT: v_and_b32_e32 v15, s4, v6 861; SI-NEXT: v_lshr_b32_e32 v10, v15, v7 862; SI-NEXT: v_lshl_b32_e32 v2, v2, v11 863; SI-NEXT: v_or_b32_e32 v2, v2, v10 864; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 865; SI-NEXT: v_mov_b32_e32 v12, 0xffff 866; SI-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 867; SI-NEXT: v_lshlrev_b32_e32 v3, 16, v3 868; SI-NEXT: v_and_b32_e32 v2, v12, v2 869; SI-NEXT: v_or_b32_e32 v2, v2, v3 870; SI-NEXT: v_and_b32_e32 v3, 15, v9 871; SI-NEXT: v_sub_i32_e32 v7, vcc, 16, v3 872; SI-NEXT: v_and_b32_e32 v14, s4, v5 873; SI-NEXT: v_lshr_b32_e32 v6, v14, v3 874; SI-NEXT: v_lshl_b32_e32 v1, v1, v7 875; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 876; SI-NEXT: v_or_b32_e32 v1, v1, v6 877; SI-NEXT: v_and_b32_e32 v3, 15, v8 878; SI-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 879; SI-NEXT: v_sub_i32_e32 v6, vcc, 16, v3 880; SI-NEXT: v_and_b32_e32 v13, s4, v4 881; SI-NEXT: v_lshr_b32_e32 v5, v13, v3 882; SI-NEXT: v_lshl_b32_e32 v0, v0, v6 883; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 884; SI-NEXT: v_or_b32_e32 v0, v0, v5 885; SI-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 886; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 887; SI-NEXT: v_and_b32_e32 v0, v12, v0 888; SI-NEXT: v_or_b32_e32 v0, v0, v1 889; SI-NEXT: v_alignbit_b32 v1, v2, v0, 16 890; SI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 891; SI-NEXT: s_setpc_b64 s[30:31] 892; 893; VI-LABEL: v_fshr_v4i16: 894; VI: ; %bb.0: 895; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 896; VI-NEXT: v_mov_b32_e32 v6, 15 897; VI-NEXT: v_and_b32_sdwa v7, v5, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 898; VI-NEXT: v_lshrrev_b32_e32 v8, 16, v3 899; VI-NEXT: v_lshrrev_b16_e32 v9, v7, v8 900; VI-NEXT: v_sub_u16_e32 v7, 16, v7 901; VI-NEXT: v_lshlrev_b16_sdwa v7, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 902; VI-NEXT: v_or_b32_e32 v7, v7, v9 903; VI-NEXT: v_bfe_u32 v9, v5, 16, 4 904; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v9 905; VI-NEXT: v_cndmask_b32_e32 v7, v7, v8, vcc 906; VI-NEXT: v_and_b32_sdwa v6, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 907; VI-NEXT: v_lshrrev_b32_e32 v8, 16, v2 908; VI-NEXT: v_lshrrev_b16_e32 v9, v6, v8 909; VI-NEXT: v_sub_u16_e32 v6, 16, v6 910; VI-NEXT: v_lshlrev_b16_sdwa v6, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 911; VI-NEXT: v_or_b32_e32 v6, v6, v9 912; VI-NEXT: v_bfe_u32 v9, v4, 16, 4 913; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v9 914; VI-NEXT: v_cndmask_b32_e32 v6, v6, v8, vcc 915; VI-NEXT: v_and_b32_e32 v8, 15, v5 916; VI-NEXT: v_lshrrev_b16_e32 v9, v8, v3 917; VI-NEXT: v_sub_u16_e32 v8, 16, v8 918; VI-NEXT: s_mov_b32 s4, 0xf000f 919; VI-NEXT: v_lshlrev_b16_e32 v1, v8, v1 920; VI-NEXT: v_and_b32_e32 v5, s4, v5 921; VI-NEXT: v_or_b32_e32 v1, v1, v9 922; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v5 923; VI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 924; VI-NEXT: v_and_b32_e32 v3, 15, v4 925; VI-NEXT: v_lshrrev_b16_e32 v5, v3, v2 926; VI-NEXT: v_sub_u16_e32 v3, 16, v3 927; VI-NEXT: v_lshlrev_b16_e32 v0, v3, v0 928; VI-NEXT: v_and_b32_e32 v3, s4, v4 929; VI-NEXT: v_or_b32_e32 v0, v0, v5 930; VI-NEXT: v_cmp_eq_u16_e32 vcc, 0, v3 931; VI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 932; VI-NEXT: v_lshlrev_b32_e32 v2, 16, v6 933; VI-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 934; VI-NEXT: v_lshlrev_b32_e32 v2, 16, v7 935; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 936; VI-NEXT: s_setpc_b64 s[30:31] 937; 938; GFX9-LABEL: v_fshr_v4i16: 939; GFX9: ; %bb.0: 940; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 941; GFX9-NEXT: v_mov_b32_e32 v7, 15 942; GFX9-NEXT: v_and_b32_e32 v6, 15, v5 943; GFX9-NEXT: v_mov_b32_e32 v9, 0xffff 944; GFX9-NEXT: v_and_b32_sdwa v8, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 945; GFX9-NEXT: v_and_b32_e32 v6, v9, v6 946; GFX9-NEXT: v_lshl_or_b32 v6, v8, 16, v6 947; GFX9-NEXT: v_pk_lshrrev_b16 v8, v6, v3 948; GFX9-NEXT: v_pk_sub_i16 v6, 16, v6 op_sel_hi:[0,1] 949; GFX9-NEXT: s_mov_b32 s6, 0xf000f 950; GFX9-NEXT: v_pk_lshlrev_b16 v1, v6, v1 951; GFX9-NEXT: v_and_b32_e32 v5, s6, v5 952; GFX9-NEXT: v_or_b32_e32 v1, v1, v8 953; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v5 954; GFX9-NEXT: v_mov_b32_e32 v8, 0 955; GFX9-NEXT: v_cndmask_b32_e32 v6, v1, v3, vcc 956; GFX9-NEXT: v_cmp_eq_u16_sdwa s[4:5], v5, v8 src0_sel:WORD_1 src1_sel:DWORD 957; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v1 958; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3 959; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[4:5] 960; GFX9-NEXT: v_and_b32_e32 v3, 15, v4 961; GFX9-NEXT: v_and_b32_sdwa v5, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 962; GFX9-NEXT: v_and_b32_e32 v3, v9, v3 963; GFX9-NEXT: v_lshl_or_b32 v3, v5, 16, v3 964; GFX9-NEXT: v_pk_lshrrev_b16 v5, v3, v2 965; GFX9-NEXT: v_pk_sub_i16 v3, 16, v3 op_sel_hi:[0,1] 966; GFX9-NEXT: v_pk_lshlrev_b16 v0, v3, v0 967; GFX9-NEXT: v_and_b32_e32 v3, s6, v4 968; GFX9-NEXT: v_or_b32_e32 v0, v0, v5 969; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v3 970; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc 971; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v0 972; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2 973; GFX9-NEXT: v_cmp_eq_u16_sdwa s[4:5], v3, v8 src0_sel:WORD_1 src1_sel:DWORD 974; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[4:5] 975; GFX9-NEXT: v_and_b32_e32 v2, v9, v4 976; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v2 977; GFX9-NEXT: v_and_b32_e32 v2, v9, v6 978; GFX9-NEXT: v_lshl_or_b32 v1, v1, 16, v2 979; GFX9-NEXT: s_setpc_b64 s[30:31] 980; 981; R600-LABEL: v_fshr_v4i16: 982; R600: ; %bb.0: 983; R600-NEXT: CF_END 984; R600-NEXT: PAD 985 %ret = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2) 986 ret <4 x i16> %ret 987} 988 989define i64 @v_fshr_i64(i64 %src0, i64 %src1, i64 %src2) { 990; SI-LABEL: v_fshr_i64: 991; SI: ; %bb.0: 992; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 993; SI-NEXT: v_and_b32_e32 v4, 63, v4 994; SI-NEXT: v_sub_i32_e32 v7, vcc, 64, v4 995; SI-NEXT: v_lshr_b64 v[5:6], v[2:3], v4 996; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v7 997; SI-NEXT: v_or_b32_e32 v0, v0, v5 998; SI-NEXT: v_mov_b32_e32 v5, 0 999; SI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] 1000; SI-NEXT: v_or_b32_e32 v1, v1, v6 1001; SI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1002; SI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1003; SI-NEXT: s_setpc_b64 s[30:31] 1004; 1005; VI-LABEL: v_fshr_i64: 1006; VI: ; %bb.0: 1007; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1008; VI-NEXT: v_and_b32_e32 v4, 63, v4 1009; VI-NEXT: v_sub_u32_e32 v7, vcc, 64, v4 1010; VI-NEXT: v_lshrrev_b64 v[5:6], v4, v[2:3] 1011; VI-NEXT: v_lshlrev_b64 v[0:1], v7, v[0:1] 1012; VI-NEXT: v_or_b32_e32 v0, v0, v5 1013; VI-NEXT: v_mov_b32_e32 v5, 0 1014; VI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] 1015; VI-NEXT: v_or_b32_e32 v1, v1, v6 1016; VI-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1017; VI-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1018; VI-NEXT: s_setpc_b64 s[30:31] 1019; 1020; GFX9-LABEL: v_fshr_i64: 1021; GFX9: ; %bb.0: 1022; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1023; GFX9-NEXT: v_and_b32_e32 v4, 63, v4 1024; GFX9-NEXT: v_sub_u32_e32 v7, 64, v4 1025; GFX9-NEXT: v_lshrrev_b64 v[5:6], v4, v[2:3] 1026; GFX9-NEXT: v_lshlrev_b64 v[0:1], v7, v[0:1] 1027; GFX9-NEXT: v_or_b32_e32 v0, v0, v5 1028; GFX9-NEXT: v_mov_b32_e32 v5, 0 1029; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5] 1030; GFX9-NEXT: v_or_b32_e32 v1, v1, v6 1031; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1032; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1033; GFX9-NEXT: s_setpc_b64 s[30:31] 1034; 1035; R600-LABEL: v_fshr_i64: 1036; R600: ; %bb.0: 1037; R600-NEXT: CF_END 1038; R600-NEXT: PAD 1039 %ret = call i64 @llvm.fshr.i64(i64 %src0, i64 %src1, i64 %src2) 1040 ret i64 %ret 1041} 1042 1043define <2 x i64> @v_fshr_v2i64(<2 x i64> %src0, <2 x i64> %src1, <2 x i64> %src2) { 1044; SI-LABEL: v_fshr_v2i64: 1045; SI: ; %bb.0: 1046; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1047; SI-NEXT: v_and_b32_e32 v8, 63, v8 1048; SI-NEXT: v_sub_i32_e32 v9, vcc, 64, v8 1049; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], v9 1050; SI-NEXT: v_lshr_b64 v[11:12], v[4:5], v8 1051; SI-NEXT: v_mov_b32_e32 v9, 0 1052; SI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 1053; SI-NEXT: v_or_b32_e32 v0, v0, v11 1054; SI-NEXT: v_and_b32_e32 v8, 63, v10 1055; SI-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1056; SI-NEXT: v_sub_i32_e64 v4, s[4:5], 64, v8 1057; SI-NEXT: v_or_b32_e32 v1, v1, v12 1058; SI-NEXT: v_lshr_b64 v[10:11], v[6:7], v8 1059; SI-NEXT: v_lshl_b64 v[2:3], v[2:3], v4 1060; SI-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1061; SI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 1062; SI-NEXT: v_or_b32_e32 v3, v3, v11 1063; SI-NEXT: v_or_b32_e32 v2, v2, v10 1064; SI-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 1065; SI-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 1066; SI-NEXT: s_setpc_b64 s[30:31] 1067; 1068; VI-LABEL: v_fshr_v2i64: 1069; VI: ; %bb.0: 1070; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1071; VI-NEXT: v_and_b32_e32 v8, 63, v8 1072; VI-NEXT: v_sub_u32_e32 v9, vcc, 64, v8 1073; VI-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1] 1074; VI-NEXT: v_lshrrev_b64 v[11:12], v8, v[4:5] 1075; VI-NEXT: v_mov_b32_e32 v9, 0 1076; VI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 1077; VI-NEXT: v_or_b32_e32 v0, v0, v11 1078; VI-NEXT: v_and_b32_e32 v8, 63, v10 1079; VI-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1080; VI-NEXT: v_sub_u32_e64 v4, s[4:5], 64, v8 1081; VI-NEXT: v_or_b32_e32 v1, v1, v12 1082; VI-NEXT: v_lshrrev_b64 v[10:11], v8, v[6:7] 1083; VI-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3] 1084; VI-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1085; VI-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 1086; VI-NEXT: v_or_b32_e32 v3, v3, v11 1087; VI-NEXT: v_or_b32_e32 v2, v2, v10 1088; VI-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 1089; VI-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 1090; VI-NEXT: s_setpc_b64 s[30:31] 1091; 1092; GFX9-LABEL: v_fshr_v2i64: 1093; GFX9: ; %bb.0: 1094; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1095; GFX9-NEXT: v_and_b32_e32 v8, 63, v8 1096; GFX9-NEXT: v_sub_u32_e32 v9, 64, v8 1097; GFX9-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1] 1098; GFX9-NEXT: v_lshrrev_b64 v[11:12], v8, v[4:5] 1099; GFX9-NEXT: v_mov_b32_e32 v9, 0 1100; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 1101; GFX9-NEXT: v_or_b32_e32 v0, v0, v11 1102; GFX9-NEXT: v_and_b32_e32 v8, 63, v10 1103; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 1104; GFX9-NEXT: v_sub_u32_e32 v4, 64, v8 1105; GFX9-NEXT: v_or_b32_e32 v1, v1, v12 1106; GFX9-NEXT: v_lshrrev_b64 v[10:11], v8, v[6:7] 1107; GFX9-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3] 1108; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 1109; GFX9-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9] 1110; GFX9-NEXT: v_or_b32_e32 v3, v3, v11 1111; GFX9-NEXT: v_or_b32_e32 v2, v2, v10 1112; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc 1113; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc 1114; GFX9-NEXT: s_setpc_b64 s[30:31] 1115; 1116; R600-LABEL: v_fshr_v2i64: 1117; R600: ; %bb.0: 1118; R600-NEXT: CF_END 1119; R600-NEXT: PAD 1120 %ret = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %src0, <2 x i64> %src1, <2 x i64> %src2) 1121 ret <2 x i64> %ret 1122} 1123 1124define i24 @v_fshr_i24(i24 %src0, i24 %src1, i24 %src2) { 1125; SI-LABEL: v_fshr_i24: 1126; SI: ; %bb.0: 1127; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1128; SI-NEXT: s_mov_b32 s4, 0xffffff 1129; SI-NEXT: v_and_b32_e32 v2, s4, v2 1130; SI-NEXT: s_mov_b32 s5, 0xaaaaaaab 1131; SI-NEXT: v_mul_hi_u32 v3, v2, s5 1132; SI-NEXT: v_and_b32_e32 v4, s4, v1 1133; SI-NEXT: v_lshrrev_b32_e32 v3, 4, v3 1134; SI-NEXT: v_mul_lo_u32 v3, v3, 24 1135; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 1136; SI-NEXT: v_lshr_b32_e32 v3, v4, v2 1137; SI-NEXT: v_sub_i32_e32 v4, vcc, 24, v2 1138; SI-NEXT: v_and_b32_e32 v4, s4, v4 1139; SI-NEXT: v_lshl_b32_e32 v0, v0, v4 1140; SI-NEXT: v_or_b32_e32 v0, v0, v3 1141; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 1142; SI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1143; SI-NEXT: s_setpc_b64 s[30:31] 1144; 1145; VI-LABEL: v_fshr_i24: 1146; VI: ; %bb.0: 1147; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1148; VI-NEXT: s_mov_b32 s4, 0xffffff 1149; VI-NEXT: v_and_b32_e32 v2, s4, v2 1150; VI-NEXT: s_mov_b32 s5, 0xaaaaaaab 1151; VI-NEXT: v_mul_hi_u32 v3, v2, s5 1152; VI-NEXT: v_and_b32_e32 v4, s4, v1 1153; VI-NEXT: v_lshrrev_b32_e32 v3, 4, v3 1154; VI-NEXT: v_mul_lo_u32 v3, v3, 24 1155; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 1156; VI-NEXT: v_lshrrev_b32_e32 v3, v2, v4 1157; VI-NEXT: v_sub_u32_e32 v4, vcc, 24, v2 1158; VI-NEXT: v_and_b32_e32 v4, s4, v4 1159; VI-NEXT: v_lshlrev_b32_e32 v0, v4, v0 1160; VI-NEXT: v_or_b32_e32 v0, v0, v3 1161; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 1162; VI-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1163; VI-NEXT: s_setpc_b64 s[30:31] 1164; 1165; GFX9-LABEL: v_fshr_i24: 1166; GFX9: ; %bb.0: 1167; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1168; GFX9-NEXT: s_mov_b32 s4, 0xffffff 1169; GFX9-NEXT: v_and_b32_e32 v2, s4, v2 1170; GFX9-NEXT: s_mov_b32 s5, 0xaaaaaaab 1171; GFX9-NEXT: v_mul_hi_u32 v3, v2, s5 1172; GFX9-NEXT: v_and_b32_e32 v4, s4, v1 1173; GFX9-NEXT: v_lshrrev_b32_e32 v3, 4, v3 1174; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24 1175; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 1176; GFX9-NEXT: v_lshrrev_b32_e32 v3, v2, v4 1177; GFX9-NEXT: v_sub_u32_e32 v4, 24, v2 1178; GFX9-NEXT: v_and_b32_e32 v4, s4, v4 1179; GFX9-NEXT: v_lshl_or_b32 v0, v0, v4, v3 1180; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 1181; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1182; GFX9-NEXT: s_setpc_b64 s[30:31] 1183; 1184; R600-LABEL: v_fshr_i24: 1185; R600: ; %bb.0: 1186; R600-NEXT: CF_END 1187; R600-NEXT: PAD 1188 %ret = call i24 @llvm.fshr.i24(i24 %src0, i24 %src1, i24 %src2) 1189 ret i24 %ret 1190} 1191 1192define <2 x i24> @v_fshr_v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2) { 1193; SI-LABEL: v_fshr_v2i24: 1194; SI: ; %bb.0: 1195; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1196; SI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 1197; SI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 1198; SI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 1199; SI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 1200; SI-NEXT: buffer_load_dword v5, off, s[0:3], s32 1201; SI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:4 1202; SI-NEXT: s_mov_b32 s4, 0xffffff 1203; SI-NEXT: s_mov_b32 s5, 0xaaaaaaab 1204; SI-NEXT: v_add_i32_e32 v7, vcc, 3, v0 1205; SI-NEXT: v_add_i32_e32 v8, vcc, 4, v0 1206; SI-NEXT: v_add_i32_e32 v9, vcc, 5, v0 1207; SI-NEXT: v_add_i32_e32 v10, vcc, 2, v0 1208; SI-NEXT: s_waitcnt vmcnt(5) 1209; SI-NEXT: v_and_b32_e32 v14, s4, v1 1210; SI-NEXT: s_waitcnt vmcnt(4) 1211; SI-NEXT: v_and_b32_e32 v2, s4, v2 1212; SI-NEXT: v_mul_hi_u32 v12, v2, s5 1213; SI-NEXT: s_waitcnt vmcnt(3) 1214; SI-NEXT: v_and_b32_e32 v3, s4, v3 1215; SI-NEXT: v_mul_hi_u32 v13, v3, s5 1216; SI-NEXT: s_waitcnt vmcnt(2) 1217; SI-NEXT: v_and_b32_e32 v11, s4, v4 1218; SI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 1219; SI-NEXT: v_mul_lo_u32 v12, v12, 24 1220; SI-NEXT: v_lshrrev_b32_e32 v13, 4, v13 1221; SI-NEXT: v_mul_lo_u32 v13, v13, 24 1222; SI-NEXT: v_sub_i32_e32 v2, vcc, v2, v12 1223; SI-NEXT: v_lshr_b32_e32 v12, v14, v2 1224; SI-NEXT: v_sub_i32_e32 v3, vcc, v3, v13 1225; SI-NEXT: v_sub_i32_e32 v13, vcc, 24, v2 1226; SI-NEXT: v_sub_i32_e32 v14, vcc, 24, v3 1227; SI-NEXT: v_and_b32_e32 v13, s4, v13 1228; SI-NEXT: s_waitcnt vmcnt(1) 1229; SI-NEXT: v_lshl_b32_e32 v5, v5, v13 1230; SI-NEXT: v_and_b32_e32 v14, 0xffffff, v14 1231; SI-NEXT: v_lshr_b32_e32 v11, v11, v3 1232; SI-NEXT: s_waitcnt vmcnt(0) 1233; SI-NEXT: v_lshl_b32_e32 v6, v6, v14 1234; SI-NEXT: v_or_b32_e32 v5, v5, v12 1235; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 1236; SI-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 1237; SI-NEXT: v_or_b32_e32 v6, v6, v11 1238; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 1239; SI-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc 1240; SI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen 1241; SI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen 1242; SI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 1243; SI-NEXT: s_waitcnt expcnt(1) 1244; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 1245; SI-NEXT: s_waitcnt expcnt(0) 1246; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1247; SI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen 1248; SI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen 1249; SI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen 1250; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1251; SI-NEXT: s_setpc_b64 s[30:31] 1252; 1253; VI-LABEL: v_fshr_v2i24: 1254; VI: ; %bb.0: 1255; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1256; VI-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 1257; VI-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 1258; VI-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 1259; VI-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 1260; VI-NEXT: buffer_load_dword v5, off, s[0:3], s32 1261; VI-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:4 1262; VI-NEXT: s_mov_b32 s4, 0xffffff 1263; VI-NEXT: s_mov_b32 s5, 0xaaaaaaab 1264; VI-NEXT: v_add_u32_e32 v7, vcc, 3, v0 1265; VI-NEXT: v_add_u32_e32 v8, vcc, 4, v0 1266; VI-NEXT: v_add_u32_e32 v9, vcc, 5, v0 1267; VI-NEXT: v_add_u32_e32 v10, vcc, 2, v0 1268; VI-NEXT: s_waitcnt vmcnt(5) 1269; VI-NEXT: v_and_b32_e32 v14, s4, v1 1270; VI-NEXT: s_waitcnt vmcnt(4) 1271; VI-NEXT: v_and_b32_e32 v2, s4, v2 1272; VI-NEXT: v_mul_hi_u32 v12, v2, s5 1273; VI-NEXT: s_waitcnt vmcnt(3) 1274; VI-NEXT: v_and_b32_e32 v3, s4, v3 1275; VI-NEXT: v_mul_hi_u32 v13, v3, s5 1276; VI-NEXT: s_waitcnt vmcnt(2) 1277; VI-NEXT: v_and_b32_e32 v11, s4, v4 1278; VI-NEXT: v_lshrrev_b32_e32 v12, 4, v12 1279; VI-NEXT: v_mul_lo_u32 v12, v12, 24 1280; VI-NEXT: v_lshrrev_b32_e32 v13, 4, v13 1281; VI-NEXT: v_mul_lo_u32 v13, v13, 24 1282; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v12 1283; VI-NEXT: v_lshrrev_b32_e32 v12, v2, v14 1284; VI-NEXT: v_sub_u32_e32 v3, vcc, v3, v13 1285; VI-NEXT: v_sub_u32_e32 v13, vcc, 24, v2 1286; VI-NEXT: v_sub_u32_e32 v14, vcc, 24, v3 1287; VI-NEXT: v_and_b32_e32 v13, s4, v13 1288; VI-NEXT: s_waitcnt vmcnt(1) 1289; VI-NEXT: v_lshlrev_b32_e32 v5, v13, v5 1290; VI-NEXT: v_and_b32_e32 v14, 0xffffff, v14 1291; VI-NEXT: v_lshrrev_b32_e32 v11, v3, v11 1292; VI-NEXT: s_waitcnt vmcnt(0) 1293; VI-NEXT: v_lshlrev_b32_e32 v6, v14, v6 1294; VI-NEXT: v_or_b32_e32 v5, v5, v12 1295; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 1296; VI-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 1297; VI-NEXT: v_or_b32_e32 v6, v6, v11 1298; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 1299; VI-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc 1300; VI-NEXT: buffer_store_byte v2, v7, s[0:3], 0 offen 1301; VI-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen 1302; VI-NEXT: v_lshrrev_b32_e32 v0, 8, v2 1303; VI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 1304; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1305; VI-NEXT: buffer_store_byte v0, v8, s[0:3], 0 offen 1306; VI-NEXT: buffer_store_byte v2, v9, s[0:3], 0 offen 1307; VI-NEXT: buffer_store_byte v1, v10, s[0:3], 0 offen 1308; VI-NEXT: s_waitcnt vmcnt(0) 1309; VI-NEXT: s_setpc_b64 s[30:31] 1310; 1311; GFX9-LABEL: v_fshr_v2i24: 1312; GFX9: ; %bb.0: 1313; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1314; GFX9-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:8 1315; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 offset:16 1316; GFX9-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:20 1317; GFX9-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:12 1318; GFX9-NEXT: buffer_load_dword v5, off, s[0:3], s32 1319; GFX9-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:4 1320; GFX9-NEXT: s_mov_b32 s4, 0xffffff 1321; GFX9-NEXT: s_mov_b32 s5, 0xaaaaaaab 1322; GFX9-NEXT: s_waitcnt vmcnt(5) 1323; GFX9-NEXT: v_and_b32_e32 v10, s4, v1 1324; GFX9-NEXT: s_waitcnt vmcnt(4) 1325; GFX9-NEXT: v_and_b32_e32 v2, s4, v2 1326; GFX9-NEXT: v_mul_hi_u32 v6, v2, s5 1327; GFX9-NEXT: s_waitcnt vmcnt(3) 1328; GFX9-NEXT: v_and_b32_e32 v3, s4, v3 1329; GFX9-NEXT: v_mul_hi_u32 v7, v3, s5 1330; GFX9-NEXT: s_waitcnt vmcnt(2) 1331; GFX9-NEXT: v_and_b32_e32 v9, s4, v4 1332; GFX9-NEXT: v_lshrrev_b32_e32 v6, 4, v6 1333; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 1334; GFX9-NEXT: v_lshrrev_b32_e32 v7, 4, v7 1335; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 1336; GFX9-NEXT: v_sub_u32_e32 v2, v2, v6 1337; GFX9-NEXT: v_lshrrev_b32_e32 v6, v2, v10 1338; GFX9-NEXT: v_sub_u32_e32 v3, v3, v7 1339; GFX9-NEXT: v_sub_u32_e32 v7, 24, v2 1340; GFX9-NEXT: v_sub_u32_e32 v10, 24, v3 1341; GFX9-NEXT: v_and_b32_e32 v7, s4, v7 1342; GFX9-NEXT: v_lshrrev_b32_e32 v9, v3, v9 1343; GFX9-NEXT: v_and_b32_e32 v10, 0xffffff, v10 1344; GFX9-NEXT: s_waitcnt vmcnt(1) 1345; GFX9-NEXT: v_lshl_or_b32 v5, v5, v7, v6 1346; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2 1347; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 1348; GFX9-NEXT: s_waitcnt vmcnt(0) 1349; GFX9-NEXT: v_lshl_or_b32 v6, v8, v10, v9 1350; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 1351; GFX9-NEXT: v_cndmask_b32_e32 v2, v6, v4, vcc 1352; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v2 1353; GFX9-NEXT: buffer_store_byte_d16_hi v2, v0, s[0:3], 0 offen offset:5 1354; GFX9-NEXT: buffer_store_byte v3, v0, s[0:3], 0 offen offset:4 1355; GFX9-NEXT: buffer_store_byte v2, v0, s[0:3], 0 offen offset:3 1356; GFX9-NEXT: buffer_store_byte_d16_hi v1, v0, s[0:3], 0 offen offset:2 1357; GFX9-NEXT: buffer_store_short v1, v0, s[0:3], 0 offen 1358; GFX9-NEXT: s_waitcnt vmcnt(0) 1359; GFX9-NEXT: s_setpc_b64 s[30:31] 1360; 1361; R600-LABEL: v_fshr_v2i24: 1362; R600: ; %bb.0: 1363; R600-NEXT: CF_END 1364; R600-NEXT: PAD 1365 %ret = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %src0, <2 x i24> %src1, <2 x i24> %src2) 1366 ret <2 x i24> %ret 1367} 1368