1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefixes=GCN,GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s 6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GFX11 %s 7 8define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) { 9; GFX6-LABEL: s_fshr_i7: 10; GFX6: ; %bb.0: 11; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 12; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 13; GFX6-NEXT: s_and_b32 s2, s2, 0x7f 14; GFX6-NEXT: s_lshl_b32 s0, s0, 1 15; GFX6-NEXT: s_and_b32 s1, s1, 0x7f 16; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 17; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 18; GFX6-NEXT: v_mul_lo_u32 v1, -7, v0 19; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 20; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 21; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 22; GFX6-NEXT: v_mul_lo_u32 v0, v0, 7 23; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 24; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0 25; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 26; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 27; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0 28; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 29; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 30; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 6, v0 31; GFX6-NEXT: v_and_b32_e32 v0, 0x7f, v0 32; GFX6-NEXT: v_and_b32_e32 v1, 0x7f, v1 33; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 34; GFX6-NEXT: v_lshr_b32_e32 v0, s1, v0 35; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 36; GFX6-NEXT: v_readfirstlane_b32 s0, v0 37; GFX6-NEXT: ; return to shader part epilog 38; 39; GFX8-LABEL: s_fshr_i7: 40; GFX8: ; %bb.0: 41; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 42; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 43; GFX8-NEXT: s_and_b32 s2, s2, 0x7f 44; GFX8-NEXT: s_lshl_b32 s0, s0, 1 45; GFX8-NEXT: s_and_b32 s1, s1, 0x7f 46; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 47; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 48; GFX8-NEXT: v_mul_lo_u32 v1, -7, v0 49; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 50; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 51; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0 52; GFX8-NEXT: v_mul_lo_u32 v0, v0, 7 53; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0 54; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0 55; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 56; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 57; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0 58; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 59; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 60; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0 61; GFX8-NEXT: v_and_b32_e32 v0, 0x7f, v0 62; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v1 63; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0 64; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s1 65; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 66; GFX8-NEXT: v_readfirstlane_b32 s0, v0 67; GFX8-NEXT: ; return to shader part epilog 68; 69; GFX9-LABEL: s_fshr_i7: 70; GFX9: ; %bb.0: 71; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 72; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 73; GFX9-NEXT: s_and_b32 s2, s2, 0x7f 74; GFX9-NEXT: s_lshl_b32 s0, s0, 1 75; GFX9-NEXT: s_and_b32 s1, s1, 0x7f 76; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 77; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 78; GFX9-NEXT: v_mul_lo_u32 v1, -7, v0 79; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 80; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 81; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0 82; GFX9-NEXT: v_mul_lo_u32 v0, v0, 7 83; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0 84; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0 85; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 86; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 87; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0 88; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 89; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 90; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0 91; GFX9-NEXT: v_and_b32_e32 v0, 0x7f, v0 92; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v1 93; GFX9-NEXT: v_lshlrev_b16_e64 v1, v1, s0 94; GFX9-NEXT: v_lshrrev_b16_e64 v0, v0, s1 95; GFX9-NEXT: v_or_b32_e32 v0, v1, v0 96; GFX9-NEXT: v_readfirstlane_b32 s0, v0 97; GFX9-NEXT: ; return to shader part epilog 98; 99; GFX10-LABEL: s_fshr_i7: 100; GFX10: ; %bb.0: 101; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 102; GFX10-NEXT: s_and_b32 s2, s2, 0x7f 103; GFX10-NEXT: s_lshl_b32 s0, s0, 1 104; GFX10-NEXT: s_and_b32 s1, s1, 0x7f 105; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 106; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 107; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 108; GFX10-NEXT: v_mul_lo_u32 v1, -7, v0 109; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 110; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 111; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0 112; GFX10-NEXT: v_mul_lo_u32 v0, v0, 7 113; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0 114; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0 115; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 116; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 117; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0 118; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 119; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 120; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0 121; GFX10-NEXT: v_and_b32_e32 v0, 0x7f, v0 122; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1 123; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1 124; GFX10-NEXT: v_lshlrev_b16 v1, v1, s0 125; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 126; GFX10-NEXT: v_readfirstlane_b32 s0, v0 127; GFX10-NEXT: ; return to shader part epilog 128; 129; GFX11-LABEL: s_fshr_i7: 130; GFX11: ; %bb.0: 131; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 132; GFX11-NEXT: s_and_b32 s2, s2, 0x7f 133; GFX11-NEXT: s_lshl_b32 s0, s0, 1 134; GFX11-NEXT: s_and_b32 s1, s1, 0x7f 135; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) 136; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 137; GFX11-NEXT: s_waitcnt_depctr 0xfff 138; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 139; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 140; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 141; GFX11-NEXT: v_mul_lo_u32 v1, -7, v0 142; GFX11-NEXT: v_mul_hi_u32 v1, v0, v1 143; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 144; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1 145; GFX11-NEXT: v_mul_hi_u32 v0, s2, v0 146; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 147; GFX11-NEXT: v_mul_lo_u32 v0, v0, 7 148; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0 149; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 150; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0 151; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 152; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 153; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 154; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 7, v0 155; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 156; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 157; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 158; GFX11-NEXT: v_sub_nc_u16 v1, 6, v0 159; GFX11-NEXT: v_and_b32_e32 v0, 0x7f, v0 160; GFX11-NEXT: v_and_b32_e32 v1, 0x7f, v1 161; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 162; GFX11-NEXT: v_lshrrev_b16 v0, v0, s1 163; GFX11-NEXT: v_lshlrev_b16 v1, v1, s0 164; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 165; GFX11-NEXT: v_or_b32_e32 v0, v1, v0 166; GFX11-NEXT: v_readfirstlane_b32 s0, v0 167; GFX11-NEXT: ; return to shader part epilog 168 %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt) 169 ret i7 %result 170} 171 172define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) { 173; GFX6-LABEL: v_fshr_i7: 174; GFX6: ; %bb.0: 175; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 176; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 177; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 178; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2 179; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 180; GFX6-NEXT: v_and_b32_e32 v1, 0x7f, v1 181; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 182; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 183; GFX6-NEXT: v_mul_lo_u32 v4, -7, v3 184; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4 185; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 186; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 187; GFX6-NEXT: v_mul_lo_u32 v3, v3, 7 188; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 189; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2 190; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 191; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 192; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2 193; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 194; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 195; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 6, v2 196; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2 197; GFX6-NEXT: v_and_b32_e32 v3, 0x7f, v3 198; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0 199; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 200; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 201; GFX6-NEXT: s_setpc_b64 s[30:31] 202; 203; GFX8-LABEL: v_fshr_i7: 204; GFX8: ; %bb.0: 205; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 207; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3 208; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2 209; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 210; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v1 211; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 212; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 213; GFX8-NEXT: v_mul_lo_u32 v4, -7, v3 214; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4 215; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4 216; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3 217; GFX8-NEXT: v_mul_lo_u32 v3, v3, 7 218; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 219; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2 220; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 221; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 222; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2 223; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 224; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 225; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2 226; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2 227; GFX8-NEXT: v_and_b32_e32 v3, 0x7f, v3 228; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0 229; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 230; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 231; GFX8-NEXT: s_setpc_b64 s[30:31] 232; 233; GFX9-LABEL: v_fshr_i7: 234; GFX9: ; %bb.0: 235; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 236; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 237; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 238; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2 239; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 240; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v1 241; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 242; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 243; GFX9-NEXT: v_mul_lo_u32 v4, -7, v3 244; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 245; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 246; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 247; GFX9-NEXT: v_mul_lo_u32 v3, v3, 7 248; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 249; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2 250; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 251; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 252; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2 253; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 254; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 255; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2 256; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2 257; GFX9-NEXT: v_and_b32_e32 v3, 0x7f, v3 258; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0 259; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1 260; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 261; GFX9-NEXT: s_setpc_b64 s[30:31] 262; 263; GFX10-LABEL: v_fshr_i7: 264; GFX10: ; %bb.0: 265; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 266; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 267; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 268; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2 269; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 270; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1 271; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 272; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 273; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 274; GFX10-NEXT: v_mul_lo_u32 v4, -7, v3 275; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4 276; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 277; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3 278; GFX10-NEXT: v_mul_lo_u32 v3, v3, 7 279; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3 280; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2 281; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 282; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 283; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2 284; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 285; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 286; GFX10-NEXT: v_sub_nc_u16 v3, 6, v2 287; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2 288; GFX10-NEXT: v_and_b32_e32 v3, 0x7f, v3 289; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 290; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0 291; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 292; GFX10-NEXT: s_setpc_b64 s[30:31] 293; 294; GFX11-LABEL: v_fshr_i7: 295; GFX11: ; %bb.0: 296; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 297; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 298; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 299; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2 300; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 301; GFX11-NEXT: v_and_b32_e32 v1, 0x7f, v1 302; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1) 303; GFX11-NEXT: v_rcp_iflag_f32_e32 v3, v3 304; GFX11-NEXT: s_waitcnt_depctr 0xfff 305; GFX11-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 306; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3 307; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 308; GFX11-NEXT: v_mul_lo_u32 v4, -7, v3 309; GFX11-NEXT: v_mul_hi_u32 v4, v3, v4 310; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 311; GFX11-NEXT: v_add_nc_u32_e32 v3, v3, v4 312; GFX11-NEXT: v_mul_hi_u32 v3, v2, v3 313; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 314; GFX11-NEXT: v_mul_lo_u32 v3, v3, 7 315; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3 316; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 317; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2 318; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 319; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 320; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 321; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 7, v2 322; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 323; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 324; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 325; GFX11-NEXT: v_sub_nc_u16 v3, 6, v2 326; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2 327; GFX11-NEXT: v_and_b32_e32 v3, 0x7f, v3 328; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 329; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1 330; GFX11-NEXT: v_lshlrev_b16 v0, v3, v0 331; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 332; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 333; GFX11-NEXT: s_setpc_b64 s[30:31] 334 %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt) 335 ret i7 %result 336} 337 338define amdgpu_ps i8 @s_fshr_i8(i8 inreg %lhs, i8 inreg %rhs, i8 inreg %amt) { 339; GFX6-LABEL: s_fshr_i8: 340; GFX6: ; %bb.0: 341; GFX6-NEXT: s_and_b32 s3, s2, 7 342; GFX6-NEXT: s_andn2_b32 s2, 7, s2 343; GFX6-NEXT: s_lshl_b32 s0, s0, 1 344; GFX6-NEXT: s_and_b32 s1, s1, 0xff 345; GFX6-NEXT: s_lshl_b32 s0, s0, s2 346; GFX6-NEXT: s_lshr_b32 s1, s1, s3 347; GFX6-NEXT: s_or_b32 s0, s0, s1 348; GFX6-NEXT: ; return to shader part epilog 349; 350; GFX8-LABEL: s_fshr_i8: 351; GFX8: ; %bb.0: 352; GFX8-NEXT: s_and_b32 s1, s1, 0xff 353; GFX8-NEXT: s_and_b32 s3, s2, 7 354; GFX8-NEXT: s_andn2_b32 s2, 7, s2 355; GFX8-NEXT: s_lshl_b32 s0, s0, 1 356; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 357; GFX8-NEXT: s_lshl_b32 s0, s0, s2 358; GFX8-NEXT: s_lshr_b32 s1, s1, s3 359; GFX8-NEXT: s_or_b32 s0, s0, s1 360; GFX8-NEXT: ; return to shader part epilog 361; 362; GFX9-LABEL: s_fshr_i8: 363; GFX9: ; %bb.0: 364; GFX9-NEXT: s_and_b32 s1, s1, 0xff 365; GFX9-NEXT: s_and_b32 s3, s2, 7 366; GFX9-NEXT: s_andn2_b32 s2, 7, s2 367; GFX9-NEXT: s_lshl_b32 s0, s0, 1 368; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 369; GFX9-NEXT: s_lshl_b32 s0, s0, s2 370; GFX9-NEXT: s_lshr_b32 s1, s1, s3 371; GFX9-NEXT: s_or_b32 s0, s0, s1 372; GFX9-NEXT: ; return to shader part epilog 373; 374; GFX10-LABEL: s_fshr_i8: 375; GFX10: ; %bb.0: 376; GFX10-NEXT: s_and_b32 s1, s1, 0xff 377; GFX10-NEXT: s_and_b32 s3, s2, 7 378; GFX10-NEXT: s_andn2_b32 s2, 7, s2 379; GFX10-NEXT: s_lshl_b32 s0, s0, 1 380; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 381; GFX10-NEXT: s_lshl_b32 s0, s0, s2 382; GFX10-NEXT: s_lshr_b32 s1, s1, s3 383; GFX10-NEXT: s_or_b32 s0, s0, s1 384; GFX10-NEXT: ; return to shader part epilog 385; 386; GFX11-LABEL: s_fshr_i8: 387; GFX11: ; %bb.0: 388; GFX11-NEXT: s_and_b32 s1, s1, 0xff 389; GFX11-NEXT: s_and_b32 s3, s2, 7 390; GFX11-NEXT: s_and_not1_b32 s2, 7, s2 391; GFX11-NEXT: s_lshl_b32 s0, s0, 1 392; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000 393; GFX11-NEXT: s_lshl_b32 s0, s0, s2 394; GFX11-NEXT: s_lshr_b32 s1, s1, s3 395; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 396; GFX11-NEXT: s_or_b32 s0, s0, s1 397; GFX11-NEXT: ; return to shader part epilog 398 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt) 399 ret i8 %result 400} 401 402define i8 @v_fshr_i8(i8 %lhs, i8 %rhs, i8 %amt) { 403; GFX6-LABEL: v_fshr_i8: 404; GFX6: ; %bb.0: 405; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 406; GFX6-NEXT: v_and_b32_e32 v3, 7, v2 407; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 408; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 409; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 410; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 411; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 412; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v1 413; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 414; GFX6-NEXT: s_setpc_b64 s[30:31] 415; 416; GFX8-LABEL: v_fshr_i8: 417; GFX8: ; %bb.0: 418; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 419; GFX8-NEXT: v_and_b32_e32 v3, 7, v2 420; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 421; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 422; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 423; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 424; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 425; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 426; GFX8-NEXT: s_setpc_b64 s[30:31] 427; 428; GFX9-LABEL: v_fshr_i8: 429; GFX9: ; %bb.0: 430; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 431; GFX9-NEXT: v_and_b32_e32 v3, 7, v2 432; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 433; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 434; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 435; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 436; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 437; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 438; GFX9-NEXT: s_setpc_b64 s[30:31] 439; 440; GFX10-LABEL: v_fshr_i8: 441; GFX10: ; %bb.0: 442; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 443; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 444; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 445; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 446; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 447; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 448; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 449; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 450; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0 451; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 452; GFX10-NEXT: s_setpc_b64 s[30:31] 453; 454; GFX11-LABEL: v_fshr_i8: 455; GFX11: ; %bb.0: 456; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 457; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 458; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2 459; GFX11-NEXT: v_and_b32_e32 v2, 7, v2 460; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 461; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 462; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 463; GFX11-NEXT: v_and_b32_e32 v3, 7, v3 464; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1 465; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 466; GFX11-NEXT: v_lshlrev_b16 v0, v3, v0 467; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 468; GFX11-NEXT: s_setpc_b64 s[30:31] 469 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt) 470 ret i8 %result 471} 472 473define amdgpu_ps i8 @s_fshr_i8_4(i8 inreg %lhs, i8 inreg %rhs) { 474; GFX6-LABEL: s_fshr_i8_4: 475; GFX6: ; %bb.0: 476; GFX6-NEXT: s_lshl_b32 s0, s0, 4 477; GFX6-NEXT: s_bfe_u32 s1, s1, 0x40004 478; GFX6-NEXT: s_or_b32 s0, s0, s1 479; GFX6-NEXT: ; return to shader part epilog 480; 481; GFX8-LABEL: s_fshr_i8_4: 482; GFX8: ; %bb.0: 483; GFX8-NEXT: s_and_b32 s1, s1, 0xff 484; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 485; GFX8-NEXT: s_lshl_b32 s0, s0, 4 486; GFX8-NEXT: s_lshr_b32 s1, s1, 4 487; GFX8-NEXT: s_or_b32 s0, s0, s1 488; GFX8-NEXT: ; return to shader part epilog 489; 490; GFX9-LABEL: s_fshr_i8_4: 491; GFX9: ; %bb.0: 492; GFX9-NEXT: s_and_b32 s1, s1, 0xff 493; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 494; GFX9-NEXT: s_lshl_b32 s0, s0, 4 495; GFX9-NEXT: s_lshr_b32 s1, s1, 4 496; GFX9-NEXT: s_or_b32 s0, s0, s1 497; GFX9-NEXT: ; return to shader part epilog 498; 499; GFX10-LABEL: s_fshr_i8_4: 500; GFX10: ; %bb.0: 501; GFX10-NEXT: s_and_b32 s1, s1, 0xff 502; GFX10-NEXT: s_lshl_b32 s0, s0, 4 503; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 504; GFX10-NEXT: s_lshr_b32 s1, s1, 4 505; GFX10-NEXT: s_or_b32 s0, s0, s1 506; GFX10-NEXT: ; return to shader part epilog 507; 508; GFX11-LABEL: s_fshr_i8_4: 509; GFX11: ; %bb.0: 510; GFX11-NEXT: s_and_b32 s1, s1, 0xff 511; GFX11-NEXT: s_lshl_b32 s0, s0, 4 512; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000 513; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 514; GFX11-NEXT: s_lshr_b32 s1, s1, 4 515; GFX11-NEXT: s_or_b32 s0, s0, s1 516; GFX11-NEXT: ; return to shader part epilog 517 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4) 518 ret i8 %result 519} 520 521define i8 @v_fshr_i8_4(i8 %lhs, i8 %rhs) { 522; GFX6-LABEL: v_fshr_i8_4: 523; GFX6: ; %bb.0: 524; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 525; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0 526; GFX6-NEXT: v_bfe_u32 v1, v1, 4, 4 527; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 528; GFX6-NEXT: s_setpc_b64 s[30:31] 529; 530; GFX8-LABEL: v_fshr_i8_4: 531; GFX8: ; %bb.0: 532; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 533; GFX8-NEXT: v_mov_b32_e32 v2, 4 534; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0 535; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 536; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 537; GFX8-NEXT: s_setpc_b64 s[30:31] 538; 539; GFX9-LABEL: v_fshr_i8_4: 540; GFX9: ; %bb.0: 541; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 542; GFX9-NEXT: s_mov_b32 s4, 4 543; GFX9-NEXT: v_lshlrev_b16_e32 v0, 4, v0 544; GFX9-NEXT: v_lshrrev_b16_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 545; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 546; GFX9-NEXT: s_setpc_b64 s[30:31] 547; 548; GFX10-LABEL: v_fshr_i8_4: 549; GFX10: ; %bb.0: 550; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 551; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 552; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 553; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0 554; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1 555; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 556; GFX10-NEXT: s_setpc_b64 s[30:31] 557; 558; GFX11-LABEL: v_fshr_i8_4: 559; GFX11: ; %bb.0: 560; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 561; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 562; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 563; GFX11-NEXT: v_lshlrev_b16 v0, 4, v0 564; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 565; GFX11-NEXT: v_lshrrev_b16 v1, 4, v1 566; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 567; GFX11-NEXT: s_setpc_b64 s[30:31] 568 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4) 569 ret i8 %result 570} 571 572define amdgpu_ps i8 @s_fshr_i8_5(i8 inreg %lhs, i8 inreg %rhs) { 573; GFX6-LABEL: s_fshr_i8_5: 574; GFX6: ; %bb.0: 575; GFX6-NEXT: s_lshl_b32 s0, s0, 3 576; GFX6-NEXT: s_bfe_u32 s1, s1, 0x30005 577; GFX6-NEXT: s_or_b32 s0, s0, s1 578; GFX6-NEXT: ; return to shader part epilog 579; 580; GFX8-LABEL: s_fshr_i8_5: 581; GFX8: ; %bb.0: 582; GFX8-NEXT: s_and_b32 s1, s1, 0xff 583; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 584; GFX8-NEXT: s_lshl_b32 s0, s0, 3 585; GFX8-NEXT: s_lshr_b32 s1, s1, 5 586; GFX8-NEXT: s_or_b32 s0, s0, s1 587; GFX8-NEXT: ; return to shader part epilog 588; 589; GFX9-LABEL: s_fshr_i8_5: 590; GFX9: ; %bb.0: 591; GFX9-NEXT: s_and_b32 s1, s1, 0xff 592; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 593; GFX9-NEXT: s_lshl_b32 s0, s0, 3 594; GFX9-NEXT: s_lshr_b32 s1, s1, 5 595; GFX9-NEXT: s_or_b32 s0, s0, s1 596; GFX9-NEXT: ; return to shader part epilog 597; 598; GFX10-LABEL: s_fshr_i8_5: 599; GFX10: ; %bb.0: 600; GFX10-NEXT: s_and_b32 s1, s1, 0xff 601; GFX10-NEXT: s_lshl_b32 s0, s0, 3 602; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 603; GFX10-NEXT: s_lshr_b32 s1, s1, 5 604; GFX10-NEXT: s_or_b32 s0, s0, s1 605; GFX10-NEXT: ; return to shader part epilog 606; 607; GFX11-LABEL: s_fshr_i8_5: 608; GFX11: ; %bb.0: 609; GFX11-NEXT: s_and_b32 s1, s1, 0xff 610; GFX11-NEXT: s_lshl_b32 s0, s0, 3 611; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000 612; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 613; GFX11-NEXT: s_lshr_b32 s1, s1, 5 614; GFX11-NEXT: s_or_b32 s0, s0, s1 615; GFX11-NEXT: ; return to shader part epilog 616 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5) 617 ret i8 %result 618} 619 620define i8 @v_fshr_i8_5(i8 %lhs, i8 %rhs) { 621; GFX6-LABEL: v_fshr_i8_5: 622; GFX6: ; %bb.0: 623; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 624; GFX6-NEXT: v_lshlrev_b32_e32 v0, 3, v0 625; GFX6-NEXT: v_bfe_u32 v1, v1, 5, 3 626; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 627; GFX6-NEXT: s_setpc_b64 s[30:31] 628; 629; GFX8-LABEL: v_fshr_i8_5: 630; GFX8: ; %bb.0: 631; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 632; GFX8-NEXT: v_mov_b32_e32 v2, 5 633; GFX8-NEXT: v_lshlrev_b16_e32 v0, 3, v0 634; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 635; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 636; GFX8-NEXT: s_setpc_b64 s[30:31] 637; 638; GFX9-LABEL: v_fshr_i8_5: 639; GFX9: ; %bb.0: 640; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 641; GFX9-NEXT: v_mov_b32_e32 v2, 5 642; GFX9-NEXT: v_lshlrev_b16_e32 v0, 3, v0 643; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 644; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 645; GFX9-NEXT: s_setpc_b64 s[30:31] 646; 647; GFX10-LABEL: v_fshr_i8_5: 648; GFX10: ; %bb.0: 649; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 650; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 651; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 652; GFX10-NEXT: v_lshlrev_b16 v0, 3, v0 653; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1 654; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 655; GFX10-NEXT: s_setpc_b64 s[30:31] 656; 657; GFX11-LABEL: v_fshr_i8_5: 658; GFX11: ; %bb.0: 659; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 660; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 661; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 662; GFX11-NEXT: v_lshlrev_b16 v0, 3, v0 663; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 664; GFX11-NEXT: v_lshrrev_b16 v1, 5, v1 665; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 666; GFX11-NEXT: s_setpc_b64 s[30:31] 667 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5) 668 ret i8 %result 669} 670 671define amdgpu_ps i16 @s_fshr_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) { 672; GFX6-LABEL: s_fshr_v2i8: 673; GFX6: ; %bb.0: 674; GFX6-NEXT: s_lshr_b32 s3, s0, 8 675; GFX6-NEXT: s_lshr_b32 s4, s2, 8 676; GFX6-NEXT: s_and_b32 s5, s2, 7 677; GFX6-NEXT: s_andn2_b32 s2, 7, s2 678; GFX6-NEXT: s_lshl_b32 s0, s0, 1 679; GFX6-NEXT: s_lshl_b32 s0, s0, s2 680; GFX6-NEXT: s_and_b32 s2, s1, 0xff 681; GFX6-NEXT: s_lshr_b32 s2, s2, s5 682; GFX6-NEXT: s_or_b32 s0, s0, s2 683; GFX6-NEXT: s_and_b32 s2, s4, 7 684; GFX6-NEXT: s_andn2_b32 s4, 7, s4 685; GFX6-NEXT: s_lshl_b32 s3, s3, 1 686; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80008 687; GFX6-NEXT: s_lshl_b32 s3, s3, s4 688; GFX6-NEXT: s_lshr_b32 s1, s1, s2 689; GFX6-NEXT: s_or_b32 s1, s3, s1 690; GFX6-NEXT: s_and_b32 s1, s1, 0xff 691; GFX6-NEXT: s_and_b32 s0, s0, 0xff 692; GFX6-NEXT: s_lshl_b32 s1, s1, 8 693; GFX6-NEXT: s_or_b32 s0, s0, s1 694; GFX6-NEXT: ; return to shader part epilog 695; 696; GFX8-LABEL: s_fshr_v2i8: 697; GFX8: ; %bb.0: 698; GFX8-NEXT: s_lshr_b32 s3, s0, 8 699; GFX8-NEXT: s_lshr_b32 s4, s1, 8 700; GFX8-NEXT: s_lshr_b32 s5, s2, 8 701; GFX8-NEXT: s_and_b32 s6, s2, 7 702; GFX8-NEXT: s_andn2_b32 s2, 7, s2 703; GFX8-NEXT: s_lshl_b32 s0, s0, 1 704; GFX8-NEXT: s_and_b32 s1, s1, 0xff 705; GFX8-NEXT: s_lshl_b32 s0, s0, s2 706; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 707; GFX8-NEXT: s_andn2_b32 s2, 7, s5 708; GFX8-NEXT: s_lshl_b32 s3, s3, 1 709; GFX8-NEXT: s_lshr_b32 s1, s1, s6 710; GFX8-NEXT: s_lshl_b32 s2, s3, s2 711; GFX8-NEXT: s_and_b32 s3, s4, 0xff 712; GFX8-NEXT: s_or_b32 s0, s0, s1 713; GFX8-NEXT: s_and_b32 s1, s5, 7 714; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 715; GFX8-NEXT: s_lshr_b32 s1, s3, s1 716; GFX8-NEXT: s_or_b32 s1, s2, s1 717; GFX8-NEXT: s_and_b32 s1, s1, 0xff 718; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000 719; GFX8-NEXT: s_and_b32 s0, s0, 0xff 720; GFX8-NEXT: s_lshl_b32 s1, s1, s2 721; GFX8-NEXT: s_or_b32 s0, s0, s1 722; GFX8-NEXT: ; return to shader part epilog 723; 724; GFX9-LABEL: s_fshr_v2i8: 725; GFX9: ; %bb.0: 726; GFX9-NEXT: s_lshr_b32 s3, s0, 8 727; GFX9-NEXT: s_lshr_b32 s4, s1, 8 728; GFX9-NEXT: s_lshr_b32 s5, s2, 8 729; GFX9-NEXT: s_and_b32 s6, s2, 7 730; GFX9-NEXT: s_andn2_b32 s2, 7, s2 731; GFX9-NEXT: s_lshl_b32 s0, s0, 1 732; GFX9-NEXT: s_and_b32 s1, s1, 0xff 733; GFX9-NEXT: s_lshl_b32 s0, s0, s2 734; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 735; GFX9-NEXT: s_andn2_b32 s2, 7, s5 736; GFX9-NEXT: s_lshl_b32 s3, s3, 1 737; GFX9-NEXT: s_lshr_b32 s1, s1, s6 738; GFX9-NEXT: s_lshl_b32 s2, s3, s2 739; GFX9-NEXT: s_and_b32 s3, s4, 0xff 740; GFX9-NEXT: s_or_b32 s0, s0, s1 741; GFX9-NEXT: s_and_b32 s1, s5, 7 742; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000 743; GFX9-NEXT: s_lshr_b32 s1, s3, s1 744; GFX9-NEXT: s_or_b32 s1, s2, s1 745; GFX9-NEXT: s_and_b32 s1, s1, 0xff 746; GFX9-NEXT: s_bfe_u32 s2, 8, 0x100000 747; GFX9-NEXT: s_and_b32 s0, s0, 0xff 748; GFX9-NEXT: s_lshl_b32 s1, s1, s2 749; GFX9-NEXT: s_or_b32 s0, s0, s1 750; GFX9-NEXT: ; return to shader part epilog 751; 752; GFX10-LABEL: s_fshr_v2i8: 753; GFX10: ; %bb.0: 754; GFX10-NEXT: s_lshr_b32 s4, s1, 8 755; GFX10-NEXT: s_lshr_b32 s3, s0, 8 756; GFX10-NEXT: s_lshr_b32 s5, s2, 8 757; GFX10-NEXT: s_and_b32 s6, s2, 7 758; GFX10-NEXT: s_andn2_b32 s2, 7, s2 759; GFX10-NEXT: s_lshl_b32 s0, s0, 1 760; GFX10-NEXT: s_and_b32 s4, s4, 0xff 761; GFX10-NEXT: s_and_b32 s1, s1, 0xff 762; GFX10-NEXT: s_lshl_b32 s0, s0, s2 763; GFX10-NEXT: s_and_b32 s2, s5, 7 764; GFX10-NEXT: s_andn2_b32 s5, 7, s5 765; GFX10-NEXT: s_lshl_b32 s3, s3, 1 766; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000 767; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 768; GFX10-NEXT: s_lshl_b32 s3, s3, s5 769; GFX10-NEXT: s_lshr_b32 s2, s4, s2 770; GFX10-NEXT: s_lshr_b32 s1, s1, s6 771; GFX10-NEXT: s_or_b32 s2, s3, s2 772; GFX10-NEXT: s_or_b32 s0, s0, s1 773; GFX10-NEXT: s_and_b32 s1, s2, 0xff 774; GFX10-NEXT: s_bfe_u32 s2, 8, 0x100000 775; GFX10-NEXT: s_and_b32 s0, s0, 0xff 776; GFX10-NEXT: s_lshl_b32 s1, s1, s2 777; GFX10-NEXT: s_or_b32 s0, s0, s1 778; GFX10-NEXT: ; return to shader part epilog 779; 780; GFX11-LABEL: s_fshr_v2i8: 781; GFX11: ; %bb.0: 782; GFX11-NEXT: s_lshr_b32 s4, s1, 8 783; GFX11-NEXT: s_lshr_b32 s3, s0, 8 784; GFX11-NEXT: s_lshr_b32 s5, s2, 8 785; GFX11-NEXT: s_and_b32 s6, s2, 7 786; GFX11-NEXT: s_and_not1_b32 s2, 7, s2 787; GFX11-NEXT: s_lshl_b32 s0, s0, 1 788; GFX11-NEXT: s_and_b32 s4, s4, 0xff 789; GFX11-NEXT: s_and_b32 s1, s1, 0xff 790; GFX11-NEXT: s_lshl_b32 s0, s0, s2 791; GFX11-NEXT: s_and_b32 s2, s5, 7 792; GFX11-NEXT: s_and_not1_b32 s5, 7, s5 793; GFX11-NEXT: s_lshl_b32 s3, s3, 1 794; GFX11-NEXT: s_bfe_u32 s4, s4, 0x100000 795; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000 796; GFX11-NEXT: s_lshl_b32 s3, s3, s5 797; GFX11-NEXT: s_lshr_b32 s2, s4, s2 798; GFX11-NEXT: s_lshr_b32 s1, s1, s6 799; GFX11-NEXT: s_or_b32 s2, s3, s2 800; GFX11-NEXT: s_or_b32 s0, s0, s1 801; GFX11-NEXT: s_and_b32 s1, s2, 0xff 802; GFX11-NEXT: s_bfe_u32 s2, 8, 0x100000 803; GFX11-NEXT: s_and_b32 s0, s0, 0xff 804; GFX11-NEXT: s_lshl_b32 s1, s1, s2 805; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 806; GFX11-NEXT: s_or_b32 s0, s0, s1 807; GFX11-NEXT: ; return to shader part epilog 808 %lhs = bitcast i16 %lhs.arg to <2 x i8> 809 %rhs = bitcast i16 %rhs.arg to <2 x i8> 810 %amt = bitcast i16 %amt.arg to <2 x i8> 811 %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt) 812 %cast.result = bitcast <2 x i8> %result to i16 813 ret i16 %cast.result 814} 815 816define i16 @v_fshr_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) { 817; GFX6-LABEL: v_fshr_v2i8: 818; GFX6: ; %bb.0: 819; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 820; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v2 821; GFX6-NEXT: v_and_b32_e32 v5, 7, v2 822; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 823; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0 824; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 825; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 826; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 827; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 828; GFX6-NEXT: v_lshrrev_b32_e32 v2, v5, v2 829; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 830; GFX6-NEXT: v_and_b32_e32 v2, 7, v4 831; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 832; GFX6-NEXT: v_and_b32_e32 v4, 7, v4 833; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 834; GFX6-NEXT: v_bfe_u32 v1, v1, 8, 8 835; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3 836; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 837; GFX6-NEXT: v_or_b32_e32 v1, v3, v1 838; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 839; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0 840; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 841; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 842; GFX6-NEXT: s_setpc_b64 s[30:31] 843; 844; GFX8-LABEL: v_fshr_v2i8: 845; GFX8: ; %bb.0: 846; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 847; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2 848; GFX8-NEXT: v_and_b32_e32 v6, 7, v2 849; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 850; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 851; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 852; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 853; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 854; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 855; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 856; GFX8-NEXT: v_xor_b32_e32 v2, -1, v5 857; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 858; GFX8-NEXT: v_and_b32_e32 v1, 7, v5 859; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 860; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3 861; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v3 862; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 863; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 864; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1 865; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 866; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 867; GFX8-NEXT: s_setpc_b64 s[30:31] 868; 869; GFX9-LABEL: v_fshr_v2i8: 870; GFX9: ; %bb.0: 871; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 872; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2 873; GFX9-NEXT: v_and_b32_e32 v6, 7, v2 874; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 875; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 876; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 877; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 878; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 879; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 880; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 881; GFX9-NEXT: v_xor_b32_e32 v2, -1, v5 882; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 883; GFX9-NEXT: v_and_b32_e32 v1, 7, v5 884; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 885; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v3 886; GFX9-NEXT: v_lshlrev_b16_e32 v2, v2, v3 887; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 888; GFX9-NEXT: v_or_b32_e32 v1, v2, v1 889; GFX9-NEXT: v_and_b32_e32 v1, 0xff, v1 890; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 891; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 892; GFX9-NEXT: s_setpc_b64 s[30:31] 893; 894; GFX10-LABEL: v_fshr_v2i8: 895; GFX10: ; %bb.0: 896; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 897; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 898; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v2 899; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0 900; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1 901; GFX10-NEXT: v_and_b32_e32 v7, 7, v2 902; GFX10-NEXT: v_xor_b32_e32 v2, -1, v2 903; GFX10-NEXT: v_xor_b32_e32 v6, -1, v3 904; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 905; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4 906; GFX10-NEXT: v_and_b32_e32 v5, 0xff, v5 907; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 908; GFX10-NEXT: v_and_b32_e32 v6, 7, v6 909; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 910; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 911; GFX10-NEXT: v_lshrrev_b16 v3, v3, v5 912; GFX10-NEXT: s_movk_i32 s4, 0xff 913; GFX10-NEXT: v_lshlrev_b16 v4, v6, v4 914; GFX10-NEXT: v_lshrrev_b16 v1, v7, v1 915; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0 916; GFX10-NEXT: v_or_b32_e32 v2, v4, v3 917; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 918; GFX10-NEXT: v_and_b32_sdwa v1, v2, s4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 919; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 920; GFX10-NEXT: s_setpc_b64 s[30:31] 921; 922; GFX11-LABEL: v_fshr_v2i8: 923; GFX11: ; %bb.0: 924; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 925; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 926; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v2 927; GFX11-NEXT: v_lshrrev_b32_e32 v4, 8, v0 928; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1 929; GFX11-NEXT: v_and_b32_e32 v7, 7, v2 930; GFX11-NEXT: v_xor_b32_e32 v2, -1, v2 931; GFX11-NEXT: v_xor_b32_e32 v6, -1, v3 932; GFX11-NEXT: v_and_b32_e32 v3, 7, v3 933; GFX11-NEXT: v_lshlrev_b16 v4, 1, v4 934; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 935; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 936; GFX11-NEXT: v_and_b32_e32 v6, 7, v6 937; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 938; GFX11-NEXT: v_and_b32_e32 v2, 7, v2 939; GFX11-NEXT: v_lshrrev_b16 v3, v3, v5 940; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 941; GFX11-NEXT: v_lshlrev_b16 v4, v6, v4 942; GFX11-NEXT: v_lshrrev_b16 v1, v7, v1 943; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 944; GFX11-NEXT: v_lshlrev_b16 v0, v2, v0 945; GFX11-NEXT: v_or_b32_e32 v2, v4, v3 946; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 947; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 948; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2 949; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 950; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 951; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 952; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 953; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 954; GFX11-NEXT: s_setpc_b64 s[30:31] 955 %lhs = bitcast i16 %lhs.arg to <2 x i8> 956 %rhs = bitcast i16 %rhs.arg to <2 x i8> 957 %amt = bitcast i16 %amt.arg to <2 x i8> 958 %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt) 959 %cast.result = bitcast <2 x i8> %result to i16 960 ret i16 %cast.result 961} 962 963define amdgpu_ps i32 @s_fshr_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) { 964; GFX6-LABEL: s_fshr_v4i8: 965; GFX6: ; %bb.0: 966; GFX6-NEXT: s_lshr_b32 s3, s0, 8 967; GFX6-NEXT: s_lshr_b32 s4, s0, 16 968; GFX6-NEXT: s_lshr_b32 s5, s0, 24 969; GFX6-NEXT: s_lshr_b32 s7, s2, 8 970; GFX6-NEXT: s_lshr_b32 s8, s2, 16 971; GFX6-NEXT: s_lshr_b32 s9, s2, 24 972; GFX6-NEXT: s_and_b32 s10, s2, 7 973; GFX6-NEXT: s_andn2_b32 s2, 7, s2 974; GFX6-NEXT: s_lshl_b32 s0, s0, 1 975; GFX6-NEXT: s_lshl_b32 s0, s0, s2 976; GFX6-NEXT: s_and_b32 s2, s1, 0xff 977; GFX6-NEXT: s_lshr_b32 s2, s2, s10 978; GFX6-NEXT: s_or_b32 s0, s0, s2 979; GFX6-NEXT: s_and_b32 s2, s7, 7 980; GFX6-NEXT: s_andn2_b32 s7, 7, s7 981; GFX6-NEXT: s_lshl_b32 s3, s3, 1 982; GFX6-NEXT: s_lshl_b32 s3, s3, s7 983; GFX6-NEXT: s_bfe_u32 s7, s1, 0x80008 984; GFX6-NEXT: s_lshr_b32 s2, s7, s2 985; GFX6-NEXT: s_lshr_b32 s6, s1, 24 986; GFX6-NEXT: s_or_b32 s2, s3, s2 987; GFX6-NEXT: s_and_b32 s3, s8, 7 988; GFX6-NEXT: s_andn2_b32 s7, 7, s8 989; GFX6-NEXT: s_lshl_b32 s4, s4, 1 990; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80010 991; GFX6-NEXT: s_lshl_b32 s4, s4, s7 992; GFX6-NEXT: s_lshr_b32 s1, s1, s3 993; GFX6-NEXT: s_or_b32 s1, s4, s1 994; GFX6-NEXT: s_and_b32 s3, s9, 7 995; GFX6-NEXT: s_andn2_b32 s4, 7, s9 996; GFX6-NEXT: s_lshl_b32 s5, s5, 1 997; GFX6-NEXT: s_and_b32 s2, s2, 0xff 998; GFX6-NEXT: s_lshl_b32 s4, s5, s4 999; GFX6-NEXT: s_lshr_b32 s3, s6, s3 1000; GFX6-NEXT: s_and_b32 s0, s0, 0xff 1001; GFX6-NEXT: s_lshl_b32 s2, s2, 8 1002; GFX6-NEXT: s_and_b32 s1, s1, 0xff 1003; GFX6-NEXT: s_or_b32 s3, s4, s3 1004; GFX6-NEXT: s_or_b32 s0, s0, s2 1005; GFX6-NEXT: s_lshl_b32 s1, s1, 16 1006; GFX6-NEXT: s_or_b32 s0, s0, s1 1007; GFX6-NEXT: s_and_b32 s1, s3, 0xff 1008; GFX6-NEXT: s_lshl_b32 s1, s1, 24 1009; GFX6-NEXT: s_or_b32 s0, s0, s1 1010; GFX6-NEXT: ; return to shader part epilog 1011; 1012; GFX8-LABEL: s_fshr_v4i8: 1013; GFX8: ; %bb.0: 1014; GFX8-NEXT: s_lshr_b32 s3, s0, 8 1015; GFX8-NEXT: s_lshr_b32 s4, s0, 16 1016; GFX8-NEXT: s_lshr_b32 s5, s0, 24 1017; GFX8-NEXT: s_lshr_b32 s6, s1, 8 1018; GFX8-NEXT: s_lshr_b32 s7, s1, 16 1019; GFX8-NEXT: s_lshr_b32 s8, s1, 24 1020; GFX8-NEXT: s_lshr_b32 s9, s2, 8 1021; GFX8-NEXT: s_lshr_b32 s10, s2, 16 1022; GFX8-NEXT: s_lshr_b32 s11, s2, 24 1023; GFX8-NEXT: s_and_b32 s12, s2, 7 1024; GFX8-NEXT: s_andn2_b32 s2, 7, s2 1025; GFX8-NEXT: s_lshl_b32 s0, s0, 1 1026; GFX8-NEXT: s_and_b32 s1, s1, 0xff 1027; GFX8-NEXT: s_lshl_b32 s0, s0, s2 1028; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 1029; GFX8-NEXT: s_andn2_b32 s2, 7, s9 1030; GFX8-NEXT: s_lshl_b32 s3, s3, 1 1031; GFX8-NEXT: s_lshr_b32 s1, s1, s12 1032; GFX8-NEXT: s_lshl_b32 s2, s3, s2 1033; GFX8-NEXT: s_and_b32 s3, s6, 0xff 1034; GFX8-NEXT: s_or_b32 s0, s0, s1 1035; GFX8-NEXT: s_and_b32 s1, s9, 7 1036; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 1037; GFX8-NEXT: s_lshr_b32 s1, s3, s1 1038; GFX8-NEXT: s_andn2_b32 s3, 7, s10 1039; GFX8-NEXT: s_lshl_b32 s4, s4, 1 1040; GFX8-NEXT: s_lshl_b32 s3, s4, s3 1041; GFX8-NEXT: s_and_b32 s4, s7, 0xff 1042; GFX8-NEXT: s_or_b32 s1, s2, s1 1043; GFX8-NEXT: s_and_b32 s2, s10, 7 1044; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 1045; GFX8-NEXT: s_lshr_b32 s2, s4, s2 1046; GFX8-NEXT: s_and_b32 s1, s1, 0xff 1047; GFX8-NEXT: s_or_b32 s2, s3, s2 1048; GFX8-NEXT: s_and_b32 s3, s11, 7 1049; GFX8-NEXT: s_andn2_b32 s4, 7, s11 1050; GFX8-NEXT: s_lshl_b32 s5, s5, 1 1051; GFX8-NEXT: s_and_b32 s0, s0, 0xff 1052; GFX8-NEXT: s_lshl_b32 s1, s1, 8 1053; GFX8-NEXT: s_lshl_b32 s4, s5, s4 1054; GFX8-NEXT: s_lshr_b32 s3, s8, s3 1055; GFX8-NEXT: s_or_b32 s0, s0, s1 1056; GFX8-NEXT: s_and_b32 s1, s2, 0xff 1057; GFX8-NEXT: s_or_b32 s3, s4, s3 1058; GFX8-NEXT: s_lshl_b32 s1, s1, 16 1059; GFX8-NEXT: s_or_b32 s0, s0, s1 1060; GFX8-NEXT: s_and_b32 s1, s3, 0xff 1061; GFX8-NEXT: s_lshl_b32 s1, s1, 24 1062; GFX8-NEXT: s_or_b32 s0, s0, s1 1063; GFX8-NEXT: ; return to shader part epilog 1064; 1065; GFX9-LABEL: s_fshr_v4i8: 1066; GFX9: ; %bb.0: 1067; GFX9-NEXT: s_lshr_b32 s3, s0, 8 1068; GFX9-NEXT: s_lshr_b32 s4, s0, 16 1069; GFX9-NEXT: s_lshr_b32 s5, s0, 24 1070; GFX9-NEXT: s_lshr_b32 s6, s1, 8 1071; GFX9-NEXT: s_lshr_b32 s7, s1, 16 1072; GFX9-NEXT: s_lshr_b32 s8, s1, 24 1073; GFX9-NEXT: s_lshr_b32 s9, s2, 8 1074; GFX9-NEXT: s_lshr_b32 s10, s2, 16 1075; GFX9-NEXT: s_lshr_b32 s11, s2, 24 1076; GFX9-NEXT: s_and_b32 s12, s2, 7 1077; GFX9-NEXT: s_andn2_b32 s2, 7, s2 1078; GFX9-NEXT: s_lshl_b32 s0, s0, 1 1079; GFX9-NEXT: s_and_b32 s1, s1, 0xff 1080; GFX9-NEXT: s_lshl_b32 s0, s0, s2 1081; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 1082; GFX9-NEXT: s_andn2_b32 s2, 7, s9 1083; GFX9-NEXT: s_lshl_b32 s3, s3, 1 1084; GFX9-NEXT: s_lshr_b32 s1, s1, s12 1085; GFX9-NEXT: s_lshl_b32 s2, s3, s2 1086; GFX9-NEXT: s_and_b32 s3, s6, 0xff 1087; GFX9-NEXT: s_or_b32 s0, s0, s1 1088; GFX9-NEXT: s_and_b32 s1, s9, 7 1089; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000 1090; GFX9-NEXT: s_lshr_b32 s1, s3, s1 1091; GFX9-NEXT: s_andn2_b32 s3, 7, s10 1092; GFX9-NEXT: s_lshl_b32 s4, s4, 1 1093; GFX9-NEXT: s_lshl_b32 s3, s4, s3 1094; GFX9-NEXT: s_and_b32 s4, s7, 0xff 1095; GFX9-NEXT: s_or_b32 s1, s2, s1 1096; GFX9-NEXT: s_and_b32 s2, s10, 7 1097; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000 1098; GFX9-NEXT: s_lshr_b32 s2, s4, s2 1099; GFX9-NEXT: s_and_b32 s1, s1, 0xff 1100; GFX9-NEXT: s_or_b32 s2, s3, s2 1101; GFX9-NEXT: s_and_b32 s3, s11, 7 1102; GFX9-NEXT: s_andn2_b32 s4, 7, s11 1103; GFX9-NEXT: s_lshl_b32 s5, s5, 1 1104; GFX9-NEXT: s_and_b32 s0, s0, 0xff 1105; GFX9-NEXT: s_lshl_b32 s1, s1, 8 1106; GFX9-NEXT: s_lshl_b32 s4, s5, s4 1107; GFX9-NEXT: s_lshr_b32 s3, s8, s3 1108; GFX9-NEXT: s_or_b32 s0, s0, s1 1109; GFX9-NEXT: s_and_b32 s1, s2, 0xff 1110; GFX9-NEXT: s_or_b32 s3, s4, s3 1111; GFX9-NEXT: s_lshl_b32 s1, s1, 16 1112; GFX9-NEXT: s_or_b32 s0, s0, s1 1113; GFX9-NEXT: s_and_b32 s1, s3, 0xff 1114; GFX9-NEXT: s_lshl_b32 s1, s1, 24 1115; GFX9-NEXT: s_or_b32 s0, s0, s1 1116; GFX9-NEXT: ; return to shader part epilog 1117; 1118; GFX10-LABEL: s_fshr_v4i8: 1119; GFX10: ; %bb.0: 1120; GFX10-NEXT: s_lshr_b32 s6, s1, 8 1121; GFX10-NEXT: s_lshr_b32 s3, s0, 8 1122; GFX10-NEXT: s_lshr_b32 s4, s0, 16 1123; GFX10-NEXT: s_lshr_b32 s5, s0, 24 1124; GFX10-NEXT: s_lshr_b32 s7, s1, 16 1125; GFX10-NEXT: s_lshr_b32 s8, s1, 24 1126; GFX10-NEXT: s_lshr_b32 s9, s2, 8 1127; GFX10-NEXT: s_lshr_b32 s10, s2, 16 1128; GFX10-NEXT: s_lshr_b32 s11, s2, 24 1129; GFX10-NEXT: s_and_b32 s12, s2, 7 1130; GFX10-NEXT: s_andn2_b32 s2, 7, s2 1131; GFX10-NEXT: s_and_b32 s1, s1, 0xff 1132; GFX10-NEXT: s_lshl_b32 s0, s0, 1 1133; GFX10-NEXT: s_and_b32 s6, s6, 0xff 1134; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 1135; GFX10-NEXT: s_lshl_b32 s0, s0, s2 1136; GFX10-NEXT: s_and_b32 s2, s9, 7 1137; GFX10-NEXT: s_andn2_b32 s9, 7, s9 1138; GFX10-NEXT: s_lshl_b32 s3, s3, 1 1139; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 1140; GFX10-NEXT: s_lshr_b32 s1, s1, s12 1141; GFX10-NEXT: s_lshl_b32 s3, s3, s9 1142; GFX10-NEXT: s_lshr_b32 s2, s6, s2 1143; GFX10-NEXT: s_and_b32 s6, s7, 0xff 1144; GFX10-NEXT: s_or_b32 s0, s0, s1 1145; GFX10-NEXT: s_or_b32 s1, s3, s2 1146; GFX10-NEXT: s_and_b32 s2, s10, 7 1147; GFX10-NEXT: s_andn2_b32 s3, 7, s10 1148; GFX10-NEXT: s_lshl_b32 s4, s4, 1 1149; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 1150; GFX10-NEXT: s_lshl_b32 s3, s4, s3 1151; GFX10-NEXT: s_lshr_b32 s2, s6, s2 1152; GFX10-NEXT: s_andn2_b32 s4, 7, s11 1153; GFX10-NEXT: s_lshl_b32 s5, s5, 1 1154; GFX10-NEXT: s_and_b32 s6, s11, 7 1155; GFX10-NEXT: s_lshl_b32 s4, s5, s4 1156; GFX10-NEXT: s_lshr_b32 s5, s8, s6 1157; GFX10-NEXT: s_or_b32 s2, s3, s2 1158; GFX10-NEXT: s_and_b32 s1, s1, 0xff 1159; GFX10-NEXT: s_or_b32 s3, s4, s5 1160; GFX10-NEXT: s_and_b32 s0, s0, 0xff 1161; GFX10-NEXT: s_lshl_b32 s1, s1, 8 1162; GFX10-NEXT: s_and_b32 s2, s2, 0xff 1163; GFX10-NEXT: s_or_b32 s0, s0, s1 1164; GFX10-NEXT: s_lshl_b32 s1, s2, 16 1165; GFX10-NEXT: s_and_b32 s2, s3, 0xff 1166; GFX10-NEXT: s_or_b32 s0, s0, s1 1167; GFX10-NEXT: s_lshl_b32 s1, s2, 24 1168; GFX10-NEXT: s_or_b32 s0, s0, s1 1169; GFX10-NEXT: ; return to shader part epilog 1170; 1171; GFX11-LABEL: s_fshr_v4i8: 1172; GFX11: ; %bb.0: 1173; GFX11-NEXT: s_lshr_b32 s6, s1, 8 1174; GFX11-NEXT: s_lshr_b32 s3, s0, 8 1175; GFX11-NEXT: s_lshr_b32 s4, s0, 16 1176; GFX11-NEXT: s_lshr_b32 s5, s0, 24 1177; GFX11-NEXT: s_lshr_b32 s7, s1, 16 1178; GFX11-NEXT: s_lshr_b32 s8, s1, 24 1179; GFX11-NEXT: s_lshr_b32 s9, s2, 8 1180; GFX11-NEXT: s_lshr_b32 s10, s2, 16 1181; GFX11-NEXT: s_lshr_b32 s11, s2, 24 1182; GFX11-NEXT: s_and_b32 s12, s2, 7 1183; GFX11-NEXT: s_and_not1_b32 s2, 7, s2 1184; GFX11-NEXT: s_and_b32 s1, s1, 0xff 1185; GFX11-NEXT: s_lshl_b32 s0, s0, 1 1186; GFX11-NEXT: s_and_b32 s6, s6, 0xff 1187; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000 1188; GFX11-NEXT: s_lshl_b32 s0, s0, s2 1189; GFX11-NEXT: s_and_b32 s2, s9, 7 1190; GFX11-NEXT: s_and_not1_b32 s9, 7, s9 1191; GFX11-NEXT: s_lshl_b32 s3, s3, 1 1192; GFX11-NEXT: s_bfe_u32 s6, s6, 0x100000 1193; GFX11-NEXT: s_lshr_b32 s1, s1, s12 1194; GFX11-NEXT: s_lshl_b32 s3, s3, s9 1195; GFX11-NEXT: s_lshr_b32 s2, s6, s2 1196; GFX11-NEXT: s_and_b32 s6, s7, 0xff 1197; GFX11-NEXT: s_or_b32 s0, s0, s1 1198; GFX11-NEXT: s_or_b32 s1, s3, s2 1199; GFX11-NEXT: s_and_b32 s2, s10, 7 1200; GFX11-NEXT: s_and_not1_b32 s3, 7, s10 1201; GFX11-NEXT: s_lshl_b32 s4, s4, 1 1202; GFX11-NEXT: s_bfe_u32 s6, s6, 0x100000 1203; GFX11-NEXT: s_lshl_b32 s3, s4, s3 1204; GFX11-NEXT: s_lshr_b32 s2, s6, s2 1205; GFX11-NEXT: s_and_not1_b32 s4, 7, s11 1206; GFX11-NEXT: s_lshl_b32 s5, s5, 1 1207; GFX11-NEXT: s_and_b32 s6, s11, 7 1208; GFX11-NEXT: s_lshl_b32 s4, s5, s4 1209; GFX11-NEXT: s_lshr_b32 s5, s8, s6 1210; GFX11-NEXT: s_or_b32 s2, s3, s2 1211; GFX11-NEXT: s_and_b32 s1, s1, 0xff 1212; GFX11-NEXT: s_or_b32 s3, s4, s5 1213; GFX11-NEXT: s_and_b32 s0, s0, 0xff 1214; GFX11-NEXT: s_lshl_b32 s1, s1, 8 1215; GFX11-NEXT: s_and_b32 s2, s2, 0xff 1216; GFX11-NEXT: s_or_b32 s0, s0, s1 1217; GFX11-NEXT: s_lshl_b32 s1, s2, 16 1218; GFX11-NEXT: s_and_b32 s2, s3, 0xff 1219; GFX11-NEXT: s_or_b32 s0, s0, s1 1220; GFX11-NEXT: s_lshl_b32 s1, s2, 24 1221; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1222; GFX11-NEXT: s_or_b32 s0, s0, s1 1223; GFX11-NEXT: ; return to shader part epilog 1224 %lhs = bitcast i32 %lhs.arg to <4 x i8> 1225 %rhs = bitcast i32 %rhs.arg to <4 x i8> 1226 %amt = bitcast i32 %amt.arg to <4 x i8> 1227 %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt) 1228 %cast.result = bitcast <4 x i8> %result to i32 1229 ret i32 %cast.result 1230} 1231 1232define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { 1233; GFX6-LABEL: v_fshr_v4i8: 1234; GFX6: ; %bb.0: 1235; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1236; GFX6-NEXT: v_lshrrev_b32_e32 v7, 8, v2 1237; GFX6-NEXT: v_lshrrev_b32_e32 v8, 16, v2 1238; GFX6-NEXT: v_lshrrev_b32_e32 v9, 24, v2 1239; GFX6-NEXT: v_and_b32_e32 v10, 7, v2 1240; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 1241; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1242; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v0 1243; GFX6-NEXT: v_lshrrev_b32_e32 v5, 24, v0 1244; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 1245; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1246; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 1247; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 1248; GFX6-NEXT: v_lshrrev_b32_e32 v2, v10, v2 1249; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 1250; GFX6-NEXT: v_and_b32_e32 v2, 7, v7 1251; GFX6-NEXT: v_xor_b32_e32 v7, -1, v7 1252; GFX6-NEXT: v_and_b32_e32 v7, 7, v7 1253; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 1254; GFX6-NEXT: v_lshlrev_b32_e32 v3, v7, v3 1255; GFX6-NEXT: v_bfe_u32 v7, v1, 8, 8 1256; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v7 1257; GFX6-NEXT: v_xor_b32_e32 v7, -1, v8 1258; GFX6-NEXT: v_lshrrev_b32_e32 v6, 24, v1 1259; GFX6-NEXT: v_or_b32_e32 v2, v3, v2 1260; GFX6-NEXT: v_and_b32_e32 v3, 7, v8 1261; GFX6-NEXT: v_and_b32_e32 v7, 7, v7 1262; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4 1263; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 1264; GFX6-NEXT: v_lshlrev_b32_e32 v4, v7, v4 1265; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v1 1266; GFX6-NEXT: v_or_b32_e32 v1, v4, v1 1267; GFX6-NEXT: v_xor_b32_e32 v4, -1, v9 1268; GFX6-NEXT: v_and_b32_e32 v3, 7, v9 1269; GFX6-NEXT: v_and_b32_e32 v4, 7, v4 1270; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5 1271; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v2 1272; GFX6-NEXT: v_lshlrev_b32_e32 v4, v4, v5 1273; GFX6-NEXT: v_lshrrev_b32_e32 v3, v3, v6 1274; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0 1275; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2 1276; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 1277; GFX6-NEXT: v_or_b32_e32 v3, v4, v3 1278; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 1279; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1280; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1281; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3 1282; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 1283; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1284; GFX6-NEXT: s_setpc_b64 s[30:31] 1285; 1286; GFX8-LABEL: v_fshr_v4i8: 1287; GFX8: ; %bb.0: 1288; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1289; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2 1290; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2 1291; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v2 1292; GFX8-NEXT: v_and_b32_e32 v8, 7, v2 1293; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 1294; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 1295; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v0 1296; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v9 1297; GFX8-NEXT: v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1298; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1299; GFX8-NEXT: v_or_b32_e32 v2, v2, v8 1300; GFX8-NEXT: v_and_b32_e32 v8, 7, v5 1301; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 1302; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 1303; GFX8-NEXT: v_and_b32_e32 v5, 7, v5 1304; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1305; GFX8-NEXT: v_lshlrev_b16_e32 v3, v5, v3 1306; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1307; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 1308; GFX8-NEXT: v_and_b32_e32 v4, 7, v6 1309; GFX8-NEXT: v_xor_b32_e32 v5, -1, v6 1310; GFX8-NEXT: v_mov_b32_e32 v6, 1 1311; GFX8-NEXT: v_mov_b32_e32 v9, 0xff 1312; GFX8-NEXT: v_and_b32_e32 v5, 7, v5 1313; GFX8-NEXT: v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1314; GFX8-NEXT: v_lshlrev_b16_e32 v5, v5, v8 1315; GFX8-NEXT: v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1316; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v8 1317; GFX8-NEXT: v_or_b32_e32 v4, v5, v4 1318; GFX8-NEXT: v_and_b32_e32 v5, 7, v7 1319; GFX8-NEXT: v_xor_b32_e32 v7, -1, v7 1320; GFX8-NEXT: v_and_b32_e32 v7, 7, v7 1321; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1322; GFX8-NEXT: v_lshlrev_b16_e32 v0, v7, v0 1323; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1324; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1325; GFX8-NEXT: v_mov_b32_e32 v1, 8 1326; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1327; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1328; GFX8-NEXT: v_and_b32_e32 v2, 0xff, v4 1329; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1330; GFX8-NEXT: v_and_b32_e32 v0, 0xff, v0 1331; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 1332; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0 1333; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 1334; GFX8-NEXT: s_setpc_b64 s[30:31] 1335; 1336; GFX9-LABEL: v_fshr_v4i8: 1337; GFX9: ; %bb.0: 1338; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1339; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2 1340; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2 1341; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v2 1342; GFX9-NEXT: v_and_b32_e32 v8, 7, v2 1343; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 1344; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 1345; GFX9-NEXT: v_lshlrev_b16_e32 v9, 1, v0 1346; GFX9-NEXT: v_lshlrev_b16_e32 v2, v2, v9 1347; GFX9-NEXT: v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1348; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1349; GFX9-NEXT: v_or_b32_e32 v2, v2, v8 1350; GFX9-NEXT: v_and_b32_e32 v8, 7, v5 1351; GFX9-NEXT: v_xor_b32_e32 v5, -1, v5 1352; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 1353; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 1354; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1355; GFX9-NEXT: v_lshlrev_b16_e32 v3, v5, v3 1356; GFX9-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1357; GFX9-NEXT: v_or_b32_e32 v3, v3, v4 1358; GFX9-NEXT: v_and_b32_e32 v4, 7, v6 1359; GFX9-NEXT: v_xor_b32_e32 v5, -1, v6 1360; GFX9-NEXT: v_mov_b32_e32 v6, 1 1361; GFX9-NEXT: v_mov_b32_e32 v9, 0xff 1362; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 1363; GFX9-NEXT: v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1364; GFX9-NEXT: v_lshlrev_b16_e32 v5, v5, v8 1365; GFX9-NEXT: v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1366; GFX9-NEXT: v_lshrrev_b16_e32 v4, v4, v8 1367; GFX9-NEXT: v_or_b32_e32 v4, v5, v4 1368; GFX9-NEXT: v_and_b32_e32 v5, 7, v7 1369; GFX9-NEXT: v_xor_b32_e32 v7, -1, v7 1370; GFX9-NEXT: v_and_b32_e32 v7, 7, v7 1371; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1372; GFX9-NEXT: v_lshlrev_b16_e32 v0, v7, v0 1373; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1374; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 1375; GFX9-NEXT: v_mov_b32_e32 v1, 8 1376; GFX9-NEXT: s_movk_i32 s4, 0xff 1377; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1378; GFX9-NEXT: v_and_or_b32 v1, v2, s4, v1 1379; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v4 1380; GFX9-NEXT: v_and_b32_e32 v0, 0xff, v0 1381; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1382; GFX9-NEXT: v_lshlrev_b32_e32 v0, 24, v0 1383; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 1384; GFX9-NEXT: s_setpc_b64 s[30:31] 1385; 1386; GFX10-LABEL: v_fshr_v4i8: 1387; GFX10: ; %bb.0: 1388; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1389; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1390; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v2 1391; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1392; GFX10-NEXT: v_xor_b32_e32 v8, -1, v2 1393; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v2 1394; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v2 1395; GFX10-NEXT: v_xor_b32_e32 v10, -1, v5 1396; GFX10-NEXT: v_lshlrev_b16 v3, 1, v3 1397; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0 1398; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v0 1399; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v1 1400; GFX10-NEXT: v_and_b32_e32 v10, 7, v10 1401; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 1402; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 1403; GFX10-NEXT: v_mov_b32_e32 v13, 0xff 1404; GFX10-NEXT: v_xor_b32_e32 v14, -1, v12 1405; GFX10-NEXT: v_lshlrev_b16 v3, v10, v3 1406; GFX10-NEXT: v_xor_b32_e32 v10, -1, v11 1407; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v1 1408; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0 1409; GFX10-NEXT: v_and_b32_e32 v8, 0xff, v1 1410; GFX10-NEXT: v_and_b32_e32 v5, 7, v5 1411; GFX10-NEXT: v_and_b32_e32 v7, 0xff, v7 1412; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 1413; GFX10-NEXT: v_and_b32_e32 v10, 7, v10 1414; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4 1415; GFX10-NEXT: v_and_b32_sdwa v1, v1, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1416; GFX10-NEXT: v_and_b32_e32 v13, 7, v14 1417; GFX10-NEXT: v_lshlrev_b16 v6, 1, v6 1418; GFX10-NEXT: v_and_b32_e32 v12, 7, v12 1419; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 1420; GFX10-NEXT: v_lshrrev_b16 v5, v5, v7 1421; GFX10-NEXT: v_lshlrev_b16 v4, v10, v4 1422; GFX10-NEXT: v_lshrrev_b16 v1, v11, v1 1423; GFX10-NEXT: v_lshlrev_b16 v6, v13, v6 1424; GFX10-NEXT: v_lshrrev_b16 v7, v12, v9 1425; GFX10-NEXT: v_lshrrev_b16 v2, v2, v8 1426; GFX10-NEXT: v_or_b32_e32 v3, v3, v5 1427; GFX10-NEXT: v_mov_b32_e32 v5, 8 1428; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 1429; GFX10-NEXT: v_or_b32_e32 v4, v6, v7 1430; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 1431; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1432; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 1433; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v4 1434; GFX10-NEXT: v_and_or_b32 v0, v0, 0xff, v2 1435; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1436; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 1437; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 1438; GFX10-NEXT: s_setpc_b64 s[30:31] 1439; 1440; GFX11-LABEL: v_fshr_v4i8: 1441; GFX11: ; %bb.0: 1442; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1443; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1444; GFX11-NEXT: v_lshrrev_b32_e32 v6, 8, v1 1445; GFX11-NEXT: v_lshrrev_b32_e32 v7, 8, v2 1446; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1447; GFX11-NEXT: v_lshrrev_b32_e32 v11, 16, v2 1448; GFX11-NEXT: v_lshrrev_b32_e32 v13, 24, v2 1449; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 1450; GFX11-NEXT: v_xor_b32_e32 v12, -1, v7 1451; GFX11-NEXT: v_and_b32_e32 v7, 7, v7 1452; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0 1453; GFX11-NEXT: v_lshrrev_b32_e32 v5, 24, v0 1454; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v1 1455; GFX11-NEXT: v_and_b32_e32 v12, 7, v12 1456; GFX11-NEXT: v_lshlrev_b16 v3, 1, v3 1457; GFX11-NEXT: v_xor_b32_e32 v14, -1, v11 1458; GFX11-NEXT: v_lshrrev_b16 v6, v7, v6 1459; GFX11-NEXT: v_xor_b32_e32 v7, -1, v13 1460; GFX11-NEXT: v_lshrrev_b32_e32 v9, 24, v1 1461; GFX11-NEXT: v_xor_b32_e32 v10, -1, v2 1462; GFX11-NEXT: v_lshlrev_b16 v3, v12, v3 1463; GFX11-NEXT: v_and_b32_e32 v11, 7, v11 1464; GFX11-NEXT: v_and_b32_e32 v12, 7, v14 1465; GFX11-NEXT: v_lshlrev_b16 v4, 1, v4 1466; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8 1467; GFX11-NEXT: v_and_b32_e32 v7, 7, v7 1468; GFX11-NEXT: v_lshlrev_b16 v5, 1, v5 1469; GFX11-NEXT: v_and_b32_e32 v13, 7, v13 1470; GFX11-NEXT: v_and_b32_e32 v2, 7, v2 1471; GFX11-NEXT: v_and_b32_e32 v10, 7, v10 1472; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 1473; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 1474; GFX11-NEXT: v_or_b32_e32 v3, v3, v6 1475; GFX11-NEXT: v_lshlrev_b16 v4, v12, v4 1476; GFX11-NEXT: v_lshrrev_b16 v6, v11, v8 1477; GFX11-NEXT: v_lshlrev_b16 v5, v7, v5 1478; GFX11-NEXT: v_lshrrev_b16 v7, v13, v9 1479; GFX11-NEXT: v_lshlrev_b16 v0, v10, v0 1480; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1 1481; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 1482; GFX11-NEXT: v_or_b32_e32 v3, v4, v6 1483; GFX11-NEXT: v_or_b32_e32 v4, v5, v7 1484; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1485; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 1486; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v2 1487; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1488; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 1489; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 1490; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 1491; GFX11-NEXT: v_and_or_b32 v0, v0, 0xff, v1 1492; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v2 1493; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) 1494; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v3 1495; GFX11-NEXT: v_or3_b32 v0, v0, v1, v2 1496; GFX11-NEXT: s_setpc_b64 s[30:31] 1497 %lhs = bitcast i32 %lhs.arg to <4 x i8> 1498 %rhs = bitcast i32 %rhs.arg to <4 x i8> 1499 %amt = bitcast i32 %amt.arg to <4 x i8> 1500 %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt) 1501 %cast.result = bitcast <4 x i8> %result to i32 1502 ret i32 %cast.result 1503} 1504 1505define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt) { 1506; GFX6-LABEL: s_fshr_i24: 1507; GFX6: ; %bb.0: 1508; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1509; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 1510; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1511; GFX6-NEXT: s_and_b32 s2, s2, 0xffffff 1512; GFX6-NEXT: s_lshl_b32 s0, s0, 1 1513; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1514; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 1515; GFX6-NEXT: s_and_b32 s1, s1, 0xffffff 1516; GFX6-NEXT: v_mul_lo_u32 v1, v1, v0 1517; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 1518; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 1519; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 1520; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 1521; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 1522; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0 1523; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1524; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1525; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0 1526; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1527; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1528; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 23, v0 1529; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1530; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1531; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 1532; GFX6-NEXT: v_lshr_b32_e32 v0, s1, v0 1533; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 1534; GFX6-NEXT: v_readfirstlane_b32 s0, v0 1535; GFX6-NEXT: ; return to shader part epilog 1536; 1537; GFX8-LABEL: s_fshr_i24: 1538; GFX8: ; %bb.0: 1539; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1540; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 1541; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1542; GFX8-NEXT: s_and_b32 s2, s2, 0xffffff 1543; GFX8-NEXT: s_lshl_b32 s0, s0, 1 1544; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1545; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 1546; GFX8-NEXT: s_and_b32 s1, s1, 0xffffff 1547; GFX8-NEXT: v_mul_lo_u32 v1, v1, v0 1548; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 1549; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 1550; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0 1551; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 1552; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0 1553; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0 1554; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1555; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1556; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0 1557; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1558; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1559; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0 1560; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1561; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1562; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0 1563; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s1 1564; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 1565; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1566; GFX8-NEXT: ; return to shader part epilog 1567; 1568; GFX9-LABEL: s_fshr_i24: 1569; GFX9: ; %bb.0: 1570; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1571; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 1572; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1573; GFX9-NEXT: s_and_b32 s2, s2, 0xffffff 1574; GFX9-NEXT: s_and_b32 s1, s1, 0xffffff 1575; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1576; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 1577; GFX9-NEXT: s_lshl_b32 s0, s0, 1 1578; GFX9-NEXT: v_mul_lo_u32 v1, v1, v0 1579; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 1580; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 1581; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0 1582; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 1583; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0 1584; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0 1585; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1586; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1587; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0 1588; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1589; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1590; GFX9-NEXT: v_sub_u32_e32 v1, 23, v0 1591; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1592; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1593; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s1 1594; GFX9-NEXT: v_lshl_or_b32 v0, s0, v1, v0 1595; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1596; GFX9-NEXT: ; return to shader part epilog 1597; 1598; GFX10-LABEL: s_fshr_i24: 1599; GFX10: ; %bb.0: 1600; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1601; GFX10-NEXT: s_and_b32 s2, s2, 0xffffff 1602; GFX10-NEXT: s_and_b32 s1, s1, 0xffffff 1603; GFX10-NEXT: s_lshl_b32 s0, s0, 1 1604; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 1605; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1606; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 1607; GFX10-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0 1608; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 1609; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 1610; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0 1611; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 1612; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0 1613; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0 1614; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1615; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1616; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0 1617; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1618; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1619; GFX10-NEXT: v_sub_nc_u32_e32 v1, 23, v0 1620; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1621; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1622; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s1 1623; GFX10-NEXT: v_lshl_or_b32 v0, s0, v1, v0 1624; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1625; GFX10-NEXT: ; return to shader part epilog 1626; 1627; GFX11-LABEL: s_fshr_i24: 1628; GFX11: ; %bb.0: 1629; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1630; GFX11-NEXT: s_and_b32 s2, s2, 0xffffff 1631; GFX11-NEXT: s_and_b32 s1, s1, 0xffffff 1632; GFX11-NEXT: s_lshl_b32 s0, s0, 1 1633; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) 1634; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 1635; GFX11-NEXT: s_waitcnt_depctr 0xfff 1636; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1637; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 1638; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1639; GFX11-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0 1640; GFX11-NEXT: v_mul_hi_u32 v1, v0, v1 1641; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1642; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1 1643; GFX11-NEXT: v_mul_hi_u32 v0, s2, v0 1644; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1645; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 1646; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0 1647; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1648; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0 1649; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1650; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1651; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1652; GFX11-NEXT: v_subrev_nc_u32_e32 v1, 24, v0 1653; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1654; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1655; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1656; GFX11-NEXT: v_sub_nc_u32_e32 v1, 23, v0 1657; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1658; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1659; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1660; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s1 1661; GFX11-NEXT: v_lshl_or_b32 v0, s0, v1, v0 1662; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1663; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1664; GFX11-NEXT: ; return to shader part epilog 1665 %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt) 1666 ret i24 %result 1667} 1668 1669define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) { 1670; GFX6-LABEL: v_fshr_i24: 1671; GFX6: ; %bb.0: 1672; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1673; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1674; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 1675; GFX6-NEXT: v_mov_b32_e32 v4, 0xffffffe8 1676; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1677; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1678; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1679; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 1680; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1681; GFX6-NEXT: v_mul_lo_u32 v4, v4, v3 1682; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4 1683; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 1684; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 1685; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24 1686; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 1687; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2 1688; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1689; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1690; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2 1691; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1692; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1693; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2 1694; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1695; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3 1696; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0 1697; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1698; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1699; GFX6-NEXT: s_setpc_b64 s[30:31] 1700; 1701; GFX8-LABEL: v_fshr_i24: 1702; GFX8: ; %bb.0: 1703; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1704; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1705; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3 1706; GFX8-NEXT: v_mov_b32_e32 v4, 0xffffffe8 1707; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1708; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1709; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1710; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 1711; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1712; GFX8-NEXT: v_mul_lo_u32 v4, v4, v3 1713; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4 1714; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4 1715; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3 1716; GFX8-NEXT: v_mul_lo_u32 v3, v3, 24 1717; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 1718; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2 1719; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1720; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1721; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2 1722; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1723; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1724; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2 1725; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1726; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3 1727; GFX8-NEXT: v_lshlrev_b32_e32 v0, v3, v0 1728; GFX8-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1729; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1730; GFX8-NEXT: s_setpc_b64 s[30:31] 1731; 1732; GFX9-LABEL: v_fshr_i24: 1733; GFX9: ; %bb.0: 1734; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1735; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1736; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 1737; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffffe8 1738; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1739; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1740; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1741; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 1742; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1743; GFX9-NEXT: v_mul_lo_u32 v4, v4, v3 1744; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 1745; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 1746; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 1747; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24 1748; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 1749; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2 1750; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1751; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1752; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2 1753; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1754; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1755; GFX9-NEXT: v_sub_u32_e32 v3, 23, v2 1756; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1757; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v3 1758; GFX9-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1759; GFX9-NEXT: v_lshl_or_b32 v0, v0, v3, v1 1760; GFX9-NEXT: s_setpc_b64 s[30:31] 1761; 1762; GFX10-LABEL: v_fshr_i24: 1763; GFX10: ; %bb.0: 1764; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1765; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1766; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1767; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1768; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1769; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1770; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 1771; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1772; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 1773; GFX10-NEXT: v_mul_lo_u32 v4, 0xffffffe8, v3 1774; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4 1775; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 1776; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3 1777; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24 1778; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3 1779; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2 1780; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1781; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1782; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2 1783; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1784; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1785; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v2 1786; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1787; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 1788; GFX10-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1789; GFX10-NEXT: v_lshl_or_b32 v0, v0, v3, v1 1790; GFX10-NEXT: s_setpc_b64 s[30:31] 1791; 1792; GFX11-LABEL: v_fshr_i24: 1793; GFX11: ; %bb.0: 1794; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1795; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1796; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1797; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1798; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1799; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1800; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1) 1801; GFX11-NEXT: v_rcp_iflag_f32_e32 v3, v3 1802; GFX11-NEXT: s_waitcnt_depctr 0xfff 1803; GFX11-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1804; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3 1805; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1806; GFX11-NEXT: v_mul_lo_u32 v4, 0xffffffe8, v3 1807; GFX11-NEXT: v_mul_hi_u32 v4, v3, v4 1808; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1809; GFX11-NEXT: v_add_nc_u32_e32 v3, v3, v4 1810; GFX11-NEXT: v_mul_hi_u32 v3, v2, v3 1811; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1812; GFX11-NEXT: v_mul_lo_u32 v3, v3, 24 1813; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3 1814; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1815; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2 1816; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1817; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1818; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1819; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v2 1820; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1821; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1822; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1823; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v2 1824; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1825; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3 1826; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1827; GFX11-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1828; GFX11-NEXT: v_lshl_or_b32 v0, v0, v3, v1 1829; GFX11-NEXT: s_setpc_b64 s[30:31] 1830 %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt) 1831 ret i24 %result 1832} 1833 1834define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) { 1835; GFX6-LABEL: s_fshr_v2i24: 1836; GFX6: ; %bb.0: 1837; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1838; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 1839; GFX6-NEXT: s_lshr_b32 s6, s0, 16 1840; GFX6-NEXT: s_lshr_b32 s7, s0, 24 1841; GFX6-NEXT: s_lshr_b32 s8, s1, 8 1842; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1843; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 1844; GFX6-NEXT: s_and_b32 s9, s0, 0xff 1845; GFX6-NEXT: s_bfe_u32 s0, s0, 0x80008 1846; GFX6-NEXT: s_and_b32 s1, s1, 0xff 1847; GFX6-NEXT: s_lshl_b32 s0, s0, 8 1848; GFX6-NEXT: s_lshl_b32 s1, s1, 8 1849; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1850; GFX6-NEXT: s_or_b32 s0, s9, s0 1851; GFX6-NEXT: s_or_b32 s1, s7, s1 1852; GFX6-NEXT: s_and_b32 s7, s8, 0xff 1853; GFX6-NEXT: s_lshr_b32 s8, s2, 16 1854; GFX6-NEXT: s_lshr_b32 s9, s2, 24 1855; GFX6-NEXT: s_and_b32 s11, s2, 0xff 1856; GFX6-NEXT: s_bfe_u32 s2, s2, 0x80008 1857; GFX6-NEXT: v_mul_lo_u32 v2, v1, v0 1858; GFX6-NEXT: s_lshl_b32 s2, s2, 8 1859; GFX6-NEXT: s_and_b32 s8, s8, 0xff 1860; GFX6-NEXT: s_or_b32 s2, s11, s2 1861; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 1862; GFX6-NEXT: s_lshr_b32 s10, s3, 8 1863; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 1864; GFX6-NEXT: s_lshl_b32 s8, s8, 16 1865; GFX6-NEXT: s_and_b32 s3, s3, 0xff 1866; GFX6-NEXT: s_or_b32 s2, s2, s8 1867; GFX6-NEXT: s_lshl_b32 s3, s3, 8 1868; GFX6-NEXT: s_and_b32 s8, s10, 0xff 1869; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 1870; GFX6-NEXT: s_or_b32 s3, s9, s3 1871; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 1872; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000 1873; GFX6-NEXT: s_lshl_b32 s8, s8, 16 1874; GFX6-NEXT: s_or_b32 s3, s3, s8 1875; GFX6-NEXT: s_lshr_b32 s8, s4, 16 1876; GFX6-NEXT: s_lshr_b32 s9, s4, 24 1877; GFX6-NEXT: s_and_b32 s11, s4, 0xff 1878; GFX6-NEXT: s_bfe_u32 s4, s4, 0x80008 1879; GFX6-NEXT: s_lshl_b32 s4, s4, 8 1880; GFX6-NEXT: s_and_b32 s8, s8, 0xff 1881; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 1882; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 1883; GFX6-NEXT: s_or_b32 s4, s11, s4 1884; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 1885; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 1886; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000 1887; GFX6-NEXT: s_lshl_b32 s8, s8, 16 1888; GFX6-NEXT: s_or_b32 s4, s4, s8 1889; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 1890; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 1891; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 1892; GFX6-NEXT: s_lshr_b32 s10, s5, 8 1893; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 1894; GFX6-NEXT: s_and_b32 s5, s5, 0xff 1895; GFX6-NEXT: v_mul_lo_u32 v1, v1, v2 1896; GFX6-NEXT: s_lshl_b32 s5, s5, 8 1897; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 1898; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 1899; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1900; GFX6-NEXT: v_mul_hi_u32 v1, v2, v1 1901; GFX6-NEXT: s_and_b32 s8, s10, 0xff 1902; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1903; GFX6-NEXT: s_or_b32 s5, s9, s5 1904; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 1905; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 1906; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000 1907; GFX6-NEXT: s_lshl_b32 s8, s8, 16 1908; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1909; GFX6-NEXT: s_or_b32 s5, s5, s8 1910; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1911; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1 1912; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 1913; GFX6-NEXT: s_and_b32 s6, s6, 0xff 1914; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 1915; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000 1916; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 1917; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v0 1918; GFX6-NEXT: s_lshl_b32 s4, s6, 17 1919; GFX6-NEXT: s_lshl_b32 s0, s0, 1 1920; GFX6-NEXT: s_or_b32 s0, s4, s0 1921; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v3 1922; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1923; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 1924; GFX6-NEXT: v_lshr_b32_e32 v0, s2, v0 1925; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1 1926; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 1927; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 1928; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 1929; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1930; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 1931; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 1932; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 1933; GFX6-NEXT: s_bfe_u32 s7, s7, 0x100000 1934; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1935; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1 1936; GFX6-NEXT: s_lshl_b32 s0, s7, 17 1937; GFX6-NEXT: s_lshl_b32 s1, s1, 1 1938; GFX6-NEXT: s_or_b32 s0, s0, s1 1939; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1940; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1941; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 1942; GFX6-NEXT: v_lshr_b32_e32 v1, s3, v1 1943; GFX6-NEXT: v_bfe_u32 v3, v0, 8, 8 1944; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 1945; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 1946; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 1947; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 1948; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 1949; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1950; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 1951; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 1952; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 1953; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 1954; GFX6-NEXT: v_bfe_u32 v2, v1, 8, 8 1955; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 1956; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 1957; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 1958; GFX6-NEXT: v_readfirstlane_b32 s0, v0 1959; GFX6-NEXT: v_readfirstlane_b32 s1, v1 1960; GFX6-NEXT: ; return to shader part epilog 1961; 1962; GFX8-LABEL: s_fshr_v2i24: 1963; GFX8: ; %bb.0: 1964; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1965; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 1966; GFX8-NEXT: s_lshr_b32 s9, s1, 8 1967; GFX8-NEXT: s_bfe_u32 s10, 8, 0x100000 1968; GFX8-NEXT: s_and_b32 s1, s1, 0xff 1969; GFX8-NEXT: s_lshr_b32 s6, s0, 8 1970; GFX8-NEXT: s_lshr_b32 s8, s0, 24 1971; GFX8-NEXT: s_lshl_b32 s1, s1, s10 1972; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1973; GFX8-NEXT: s_and_b32 s6, s6, 0xff 1974; GFX8-NEXT: s_or_b32 s1, s8, s1 1975; GFX8-NEXT: s_lshr_b32 s8, s2, 8 1976; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 1977; GFX8-NEXT: s_lshr_b32 s7, s0, 16 1978; GFX8-NEXT: s_and_b32 s0, s0, 0xff 1979; GFX8-NEXT: s_lshl_b32 s6, s6, s10 1980; GFX8-NEXT: s_and_b32 s8, s8, 0xff 1981; GFX8-NEXT: s_or_b32 s0, s0, s6 1982; GFX8-NEXT: s_and_b32 s6, s7, 0xff 1983; GFX8-NEXT: s_and_b32 s7, s9, 0xff 1984; GFX8-NEXT: s_lshr_b32 s9, s2, 16 1985; GFX8-NEXT: s_lshr_b32 s11, s2, 24 1986; GFX8-NEXT: s_and_b32 s2, s2, 0xff 1987; GFX8-NEXT: s_lshl_b32 s8, s8, s10 1988; GFX8-NEXT: s_or_b32 s2, s2, s8 1989; GFX8-NEXT: s_and_b32 s8, s9, 0xff 1990; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1991; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000 1992; GFX8-NEXT: v_mul_lo_u32 v2, v1, v0 1993; GFX8-NEXT: s_lshr_b32 s12, s3, 8 1994; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 1995; GFX8-NEXT: s_lshl_b32 s8, s8, 16 1996; GFX8-NEXT: s_and_b32 s3, s3, 0xff 1997; GFX8-NEXT: s_or_b32 s2, s2, s8 1998; GFX8-NEXT: s_lshl_b32 s3, s3, s10 1999; GFX8-NEXT: s_and_b32 s8, s12, 0xff 2000; GFX8-NEXT: s_or_b32 s3, s11, s3 2001; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000 2002; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 2003; GFX8-NEXT: s_lshl_b32 s8, s8, 16 2004; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 2005; GFX8-NEXT: s_or_b32 s3, s3, s8 2006; GFX8-NEXT: s_lshr_b32 s8, s4, 8 2007; GFX8-NEXT: s_and_b32 s8, s8, 0xff 2008; GFX8-NEXT: s_lshr_b32 s9, s4, 16 2009; GFX8-NEXT: s_lshr_b32 s11, s4, 24 2010; GFX8-NEXT: s_and_b32 s4, s4, 0xff 2011; GFX8-NEXT: s_lshl_b32 s8, s8, s10 2012; GFX8-NEXT: s_or_b32 s4, s4, s8 2013; GFX8-NEXT: s_and_b32 s8, s9, 0xff 2014; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2015; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 2016; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000 2017; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 2018; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 2019; GFX8-NEXT: s_lshl_b32 s8, s8, 16 2020; GFX8-NEXT: s_or_b32 s4, s4, s8 2021; GFX8-NEXT: v_mul_hi_u32 v0, s4, v0 2022; GFX8-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 2023; GFX8-NEXT: v_cvt_u32_f32_e32 v2, v2 2024; GFX8-NEXT: s_lshr_b32 s12, s5, 8 2025; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 2026; GFX8-NEXT: s_and_b32 s5, s5, 0xff 2027; GFX8-NEXT: v_mul_lo_u32 v1, v1, v2 2028; GFX8-NEXT: s_lshl_b32 s5, s5, s10 2029; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 2030; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 2031; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 2032; GFX8-NEXT: v_mul_hi_u32 v1, v2, v1 2033; GFX8-NEXT: s_and_b32 s8, s12, 0xff 2034; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2035; GFX8-NEXT: s_or_b32 s5, s11, s5 2036; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000 2037; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 2038; GFX8-NEXT: s_bfe_u32 s5, s5, 0x100000 2039; GFX8-NEXT: s_lshl_b32 s8, s8, 16 2040; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 2041; GFX8-NEXT: s_or_b32 s5, s5, s8 2042; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2043; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 2044; GFX8-NEXT: v_mul_hi_u32 v1, s5, v1 2045; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 2046; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000 2047; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v0 2048; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 2049; GFX8-NEXT: s_lshl_b32 s4, s6, 17 2050; GFX8-NEXT: s_lshl_b32 s0, s0, 1 2051; GFX8-NEXT: s_or_b32 s0, s4, s0 2052; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v3 2053; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2054; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 2055; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s2 2056; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s5, v1 2057; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 2058; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 2059; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 2060; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2061; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 2062; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 2063; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 2064; GFX8-NEXT: s_bfe_u32 s7, s7, 0x100000 2065; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2066; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1 2067; GFX8-NEXT: s_lshl_b32 s0, s7, 17 2068; GFX8-NEXT: s_lshl_b32 s1, s1, 1 2069; GFX8-NEXT: s_or_b32 s0, s0, s1 2070; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2071; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 2072; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 2073; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s3 2074; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 2075; GFX8-NEXT: v_mov_b32_e32 v2, 8 2076; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2077; GFX8-NEXT: v_mov_b32_e32 v4, 16 2078; GFX8-NEXT: v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2079; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2080; GFX8-NEXT: v_or_b32_e32 v0, v3, v0 2081; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v1 2082; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 2083; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2084; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 2085; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD 2086; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2087; GFX8-NEXT: v_readfirstlane_b32 s1, v1 2088; GFX8-NEXT: ; return to shader part epilog 2089; 2090; GFX9-LABEL: s_fshr_v2i24: 2091; GFX9: ; %bb.0: 2092; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 2093; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 2094; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8 2095; GFX9-NEXT: s_lshr_b32 s11, s1, 8 2096; GFX9-NEXT: s_bfe_u32 s12, 8, 0x100000 2097; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2098; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 2099; GFX9-NEXT: s_and_b32 s1, s1, 0xff 2100; GFX9-NEXT: s_lshr_b32 s7, s0, 8 2101; GFX9-NEXT: s_lshr_b32 s10, s0, 24 2102; GFX9-NEXT: v_mul_lo_u32 v2, v1, v0 2103; GFX9-NEXT: s_lshl_b32 s1, s1, s12 2104; GFX9-NEXT: s_and_b32 s7, s7, 0xff 2105; GFX9-NEXT: s_or_b32 s1, s10, s1 2106; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 2107; GFX9-NEXT: s_lshr_b32 s10, s2, 8 2108; GFX9-NEXT: s_lshr_b32 s9, s0, 16 2109; GFX9-NEXT: s_and_b32 s0, s0, 0xff 2110; GFX9-NEXT: s_lshl_b32 s7, s7, s12 2111; GFX9-NEXT: s_and_b32 s10, s10, 0xff 2112; GFX9-NEXT: s_or_b32 s0, s0, s7 2113; GFX9-NEXT: s_and_b32 s7, s9, 0xff 2114; GFX9-NEXT: s_and_b32 s9, s11, 0xff 2115; GFX9-NEXT: s_lshr_b32 s11, s2, 16 2116; GFX9-NEXT: s_lshr_b32 s13, s2, 24 2117; GFX9-NEXT: s_and_b32 s2, s2, 0xff 2118; GFX9-NEXT: s_lshl_b32 s10, s10, s12 2119; GFX9-NEXT: s_or_b32 s2, s2, s10 2120; GFX9-NEXT: s_and_b32 s10, s11, 0xff 2121; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 2122; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 2123; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 2124; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 2125; GFX9-NEXT: s_lshr_b32 s14, s3, 8 2126; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000 2127; GFX9-NEXT: s_lshl_b32 s10, s10, 16 2128; GFX9-NEXT: s_and_b32 s3, s3, 0xff 2129; GFX9-NEXT: s_or_b32 s2, s2, s10 2130; GFX9-NEXT: s_lshl_b32 s3, s3, s12 2131; GFX9-NEXT: s_and_b32 s10, s14, 0xff 2132; GFX9-NEXT: s_or_b32 s3, s13, s3 2133; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 2134; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000 2135; GFX9-NEXT: s_lshl_b32 s10, s10, 16 2136; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 2137; GFX9-NEXT: s_or_b32 s3, s3, s10 2138; GFX9-NEXT: s_lshr_b32 s10, s4, 8 2139; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 2140; GFX9-NEXT: s_and_b32 s10, s10, 0xff 2141; GFX9-NEXT: s_lshr_b32 s11, s4, 16 2142; GFX9-NEXT: s_lshr_b32 s13, s4, 24 2143; GFX9-NEXT: s_and_b32 s4, s4, 0xff 2144; GFX9-NEXT: s_lshl_b32 s10, s10, s12 2145; GFX9-NEXT: s_or_b32 s4, s4, s10 2146; GFX9-NEXT: s_and_b32 s10, s11, 0xff 2147; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 2148; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2 2149; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000 2150; GFX9-NEXT: s_lshl_b32 s10, s10, 16 2151; GFX9-NEXT: s_or_b32 s4, s4, s10 2152; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 2153; GFX9-NEXT: s_lshr_b32 s14, s5, 8 2154; GFX9-NEXT: s_and_b32 s5, s5, 0xff 2155; GFX9-NEXT: v_mul_hi_u32 v1, v2, v1 2156; GFX9-NEXT: s_lshl_b32 s5, s5, s12 2157; GFX9-NEXT: s_and_b32 s10, s14, 0xff 2158; GFX9-NEXT: s_or_b32 s5, s13, s5 2159; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 2160; GFX9-NEXT: s_bfe_u32 s5, s5, 0x100000 2161; GFX9-NEXT: s_lshl_b32 s10, s10, 16 2162; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 2163; GFX9-NEXT: s_or_b32 s5, s5, s10 2164; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 2165; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 2166; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0 2167; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 2168; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 2169; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2170; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 2171; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 2172; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 2173; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000 2174; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000 2175; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2176; GFX9-NEXT: v_sub_u32_e32 v3, 23, v0 2177; GFX9-NEXT: s_lshl_b32 s4, s7, 17 2178; GFX9-NEXT: s_lshl_b32 s0, s0, 1 2179; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2180; GFX9-NEXT: s_or_b32 s0, s4, s0 2181; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v3 2182; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s2 2183; GFX9-NEXT: v_sub_u32_e32 v1, s5, v1 2184; GFX9-NEXT: v_lshl_or_b32 v0, s0, v2, v0 2185; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 2186; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 2187; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2188; GFX9-NEXT: v_subrev_u32_e32 v2, 24, v1 2189; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 2190; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 2191; GFX9-NEXT: s_bfe_u32 s9, s9, 0x100000 2192; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2193; GFX9-NEXT: v_sub_u32_e32 v2, 23, v1 2194; GFX9-NEXT: s_lshl_b32 s0, s9, 17 2195; GFX9-NEXT: s_lshl_b32 s1, s1, 1 2196; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 2197; GFX9-NEXT: s_or_b32 s0, s0, s1 2198; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2199; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s3 2200; GFX9-NEXT: s_mov_b32 s6, 8 2201; GFX9-NEXT: v_lshl_or_b32 v1, s0, v2, v1 2202; GFX9-NEXT: s_mov_b32 s8, 16 2203; GFX9-NEXT: s_movk_i32 s0, 0xff 2204; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2205; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v1 2206; GFX9-NEXT: v_and_or_b32 v2, v0, s0, v2 2207; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2208; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 2209; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3 2210; GFX9-NEXT: v_bfe_u32 v2, v1, 8, 8 2211; GFX9-NEXT: v_bfe_u32 v1, v1, 16, 8 2212; GFX9-NEXT: v_lshl_or_b32 v1, v1, 8, v2 2213; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2214; GFX9-NEXT: v_readfirstlane_b32 s1, v1 2215; GFX9-NEXT: ; return to shader part epilog 2216; 2217; GFX10-LABEL: s_fshr_v2i24: 2218; GFX10: ; %bb.0: 2219; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 2220; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 2221; GFX10-NEXT: s_lshr_b32 s9, s1, 8 2222; GFX10-NEXT: s_bfe_u32 s10, 8, 0x100000 2223; GFX10-NEXT: s_and_b32 s1, s1, 0xff 2224; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 2225; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 2226; GFX10-NEXT: s_lshr_b32 s6, s0, 8 2227; GFX10-NEXT: s_lshr_b32 s8, s0, 24 2228; GFX10-NEXT: s_lshl_b32 s1, s1, s10 2229; GFX10-NEXT: s_and_b32 s6, s6, 0xff 2230; GFX10-NEXT: s_or_b32 s1, s8, s1 2231; GFX10-NEXT: s_lshr_b32 s8, s4, 8 2232; GFX10-NEXT: s_lshr_b32 s7, s0, 16 2233; GFX10-NEXT: s_and_b32 s0, s0, 0xff 2234; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2235; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 2236; GFX10-NEXT: s_lshl_b32 s6, s6, s10 2237; GFX10-NEXT: s_and_b32 s8, s8, 0xff 2238; GFX10-NEXT: s_or_b32 s0, s0, s6 2239; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 2240; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 2241; GFX10-NEXT: s_and_b32 s6, s7, 0xff 2242; GFX10-NEXT: s_and_b32 s7, s9, 0xff 2243; GFX10-NEXT: s_lshr_b32 s9, s4, 16 2244; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 2245; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 2246; GFX10-NEXT: s_lshr_b32 s11, s4, 24 2247; GFX10-NEXT: s_and_b32 s4, s4, 0xff 2248; GFX10-NEXT: s_lshl_b32 s8, s8, s10 2249; GFX10-NEXT: s_lshr_b32 s12, s5, 8 2250; GFX10-NEXT: s_or_b32 s4, s4, s8 2251; GFX10-NEXT: s_and_b32 s8, s9, 0xff 2252; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 2253; GFX10-NEXT: v_mul_hi_u32 v3, v1, v3 2254; GFX10-NEXT: s_bfe_u32 s8, s8, 0x100000 2255; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000 2256; GFX10-NEXT: s_and_b32 s5, s5, 0xff 2257; GFX10-NEXT: s_lshl_b32 s8, s8, 16 2258; GFX10-NEXT: s_lshl_b32 s5, s5, s10 2259; GFX10-NEXT: s_or_b32 s4, s4, s8 2260; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 2261; GFX10-NEXT: s_and_b32 s8, s12, 0xff 2262; GFX10-NEXT: s_or_b32 s5, s11, s5 2263; GFX10-NEXT: s_bfe_u32 s8, s8, 0x100000 2264; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3 2265; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0 2266; GFX10-NEXT: s_bfe_u32 s5, s5, 0x100000 2267; GFX10-NEXT: s_lshl_b32 s8, s8, 16 2268; GFX10-NEXT: s_lshr_b32 s9, s2, 8 2269; GFX10-NEXT: s_or_b32 s5, s5, s8 2270; GFX10-NEXT: s_lshr_b32 s8, s2, 16 2271; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1 2272; GFX10-NEXT: s_and_b32 s9, s9, 0xff 2273; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 2274; GFX10-NEXT: s_lshr_b32 s11, s2, 24 2275; GFX10-NEXT: s_lshr_b32 s12, s3, 8 2276; GFX10-NEXT: s_and_b32 s2, s2, 0xff 2277; GFX10-NEXT: s_lshl_b32 s9, s9, s10 2278; GFX10-NEXT: s_and_b32 s8, s8, 0xff 2279; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 2280; GFX10-NEXT: s_and_b32 s3, s3, 0xff 2281; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0 2282; GFX10-NEXT: s_or_b32 s2, s2, s9 2283; GFX10-NEXT: s_bfe_u32 s4, s8, 0x100000 2284; GFX10-NEXT: s_lshl_b32 s3, s3, s10 2285; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 2286; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 2287; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1 2288; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 2289; GFX10-NEXT: s_and_b32 s5, s12, 0xff 2290; GFX10-NEXT: s_lshl_b32 s4, s4, 16 2291; GFX10-NEXT: s_or_b32 s3, s11, s3 2292; GFX10-NEXT: s_bfe_u32 s5, s5, 0x100000 2293; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2294; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 2295; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 2296; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000 2297; GFX10-NEXT: s_lshl_b32 s5, s5, 16 2298; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 2299; GFX10-NEXT: s_or_b32 s2, s2, s4 2300; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 2301; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 2302; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000 2303; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 2304; GFX10-NEXT: s_or_b32 s3, s3, s5 2305; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 2306; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2307; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 2308; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 2309; GFX10-NEXT: s_bfe_u32 s7, s7, 0x100000 2310; GFX10-NEXT: s_lshl_b32 s4, s6, 17 2311; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0 2312; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 2313; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2314; GFX10-NEXT: s_lshl_b32 s0, s0, 1 2315; GFX10-NEXT: s_lshl_b32 s1, s1, 1 2316; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2317; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v1 2318; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1 2319; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s2 2320; GFX10-NEXT: s_or_b32 s0, s4, s0 2321; GFX10-NEXT: s_lshl_b32 s2, s7, 17 2322; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2323; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s3 2324; GFX10-NEXT: v_lshl_or_b32 v0, s0, v3, v0 2325; GFX10-NEXT: s_or_b32 s0, s2, s1 2326; GFX10-NEXT: v_lshl_or_b32 v1, s0, v2, v1 2327; GFX10-NEXT: s_mov_b32 s0, 8 2328; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2329; GFX10-NEXT: s_mov_b32 s0, 16 2330; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v1 2331; GFX10-NEXT: v_bfe_u32 v4, v1, 8, 8 2332; GFX10-NEXT: v_bfe_u32 v1, v1, 16, 8 2333; GFX10-NEXT: v_and_or_b32 v2, v0, 0xff, v2 2334; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2335; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 2336; GFX10-NEXT: v_lshl_or_b32 v1, v1, 8, v4 2337; GFX10-NEXT: v_or3_b32 v0, v2, v0, v3 2338; GFX10-NEXT: v_readfirstlane_b32 s1, v1 2339; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2340; GFX10-NEXT: ; return to shader part epilog 2341; 2342; GFX11-LABEL: s_fshr_v2i24: 2343; GFX11: ; %bb.0: 2344; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 2345; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 2346; GFX11-NEXT: s_lshr_b32 s6, s0, 8 2347; GFX11-NEXT: s_bfe_u32 s9, 8, 0x100000 2348; GFX11-NEXT: s_and_b32 s6, s6, 0xff 2349; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 2350; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v1 2351; GFX11-NEXT: s_lshr_b32 s7, s0, 16 2352; GFX11-NEXT: s_lshr_b32 s8, s0, 24 2353; GFX11-NEXT: s_and_b32 s0, s0, 0xff 2354; GFX11-NEXT: s_lshl_b32 s6, s6, s9 2355; GFX11-NEXT: s_lshr_b32 s10, s1, 8 2356; GFX11-NEXT: s_or_b32 s0, s0, s6 2357; GFX11-NEXT: s_and_b32 s6, s7, 0xff 2358; GFX11-NEXT: s_and_b32 s7, s10, 0xff 2359; GFX11-NEXT: s_waitcnt_depctr 0xfff 2360; GFX11-NEXT: v_dual_mul_f32 v0, 0x4f7ffffe, v0 :: v_dual_mul_f32 v1, 0x4f7ffffe, v1 2361; GFX11-NEXT: s_lshr_b32 s10, s4, 8 2362; GFX11-NEXT: s_lshr_b32 s11, s4, 16 2363; GFX11-NEXT: s_and_b32 s10, s10, 0xff 2364; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2365; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 2366; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1 2367; GFX11-NEXT: s_and_b32 s12, s4, 0xff 2368; GFX11-NEXT: s_lshl_b32 s10, s10, s9 2369; GFX11-NEXT: s_and_b32 s11, s11, 0xff 2370; GFX11-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 2371; GFX11-NEXT: s_or_b32 s10, s12, s10 2372; GFX11-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 2373; GFX11-NEXT: s_bfe_u32 s11, s11, 0x100000 2374; GFX11-NEXT: s_bfe_u32 s10, s10, 0x100000 2375; GFX11-NEXT: s_lshl_b32 s11, s11, 16 2376; GFX11-NEXT: s_lshr_b32 s12, s5, 8 2377; GFX11-NEXT: s_or_b32 s10, s10, s11 2378; GFX11-NEXT: v_mul_hi_u32 v2, v0, v2 2379; GFX11-NEXT: s_and_b32 s5, s5, 0xff 2380; GFX11-NEXT: s_lshr_b32 s4, s4, 24 2381; GFX11-NEXT: s_lshl_b32 s5, s5, s9 2382; GFX11-NEXT: s_and_b32 s11, s12, 0xff 2383; GFX11-NEXT: s_or_b32 s4, s4, s5 2384; GFX11-NEXT: s_bfe_u32 s5, s11, 0x100000 2385; GFX11-NEXT: s_bfe_u32 s4, s4, 0x100000 2386; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v2 2387; GFX11-NEXT: v_mul_hi_u32 v2, v1, v3 2388; GFX11-NEXT: s_lshl_b32 s5, s5, 16 2389; GFX11-NEXT: s_and_b32 s1, s1, 0xff 2390; GFX11-NEXT: s_or_b32 s4, s4, s5 2391; GFX11-NEXT: v_mul_hi_u32 v0, s10, v0 2392; GFX11-NEXT: s_lshl_b32 s1, s1, s9 2393; GFX11-NEXT: s_lshr_b32 s11, s2, 16 2394; GFX11-NEXT: s_or_b32 s1, s8, s1 2395; GFX11-NEXT: v_add_nc_u32_e32 v1, v1, v2 2396; GFX11-NEXT: s_lshr_b32 s8, s2, 8 2397; GFX11-NEXT: s_lshr_b32 s5, s2, 24 2398; GFX11-NEXT: s_and_b32 s8, s8, 0xff 2399; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 2400; GFX11-NEXT: v_mul_hi_u32 v1, s4, v1 2401; GFX11-NEXT: s_and_b32 s2, s2, 0xff 2402; GFX11-NEXT: s_lshl_b32 s8, s8, s9 2403; GFX11-NEXT: s_bfe_u32 s0, s0, 0x100000 2404; GFX11-NEXT: s_or_b32 s2, s2, s8 2405; GFX11-NEXT: s_and_b32 s8, s11, 0xff 2406; GFX11-NEXT: s_bfe_u32 s2, s2, 0x100000 2407; GFX11-NEXT: v_sub_nc_u32_e32 v0, s10, v0 2408; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24 2409; GFX11-NEXT: s_bfe_u32 s8, s8, 0x100000 2410; GFX11-NEXT: s_lshr_b32 s10, s3, 8 2411; GFX11-NEXT: s_and_b32 s3, s3, 0xff 2412; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 2413; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 2414; GFX11-NEXT: s_lshl_b32 s8, s8, 16 2415; GFX11-NEXT: s_lshl_b32 s3, s3, s9 2416; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1 2417; GFX11-NEXT: s_and_b32 s4, s10, 0xff 2418; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2419; GFX11-NEXT: s_or_b32 s2, s2, s8 2420; GFX11-NEXT: s_bfe_u32 s6, s6, 0x100000 2421; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 2422; GFX11-NEXT: s_or_b32 s3, s5, s3 2423; GFX11-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 2424; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 2425; GFX11-NEXT: s_bfe_u32 s4, s4, 0x100000 2426; GFX11-NEXT: s_bfe_u32 s3, s3, 0x100000 2427; GFX11-NEXT: s_lshl_b32 s4, s4, 16 2428; GFX11-NEXT: s_lshl_b32 s5, s6, 17 2429; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2430; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 2431; GFX11-NEXT: s_lshl_b32 s0, s0, 1 2432; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000 2433; GFX11-NEXT: s_or_b32 s0, s5, s0 2434; GFX11-NEXT: s_bfe_u32 s7, s7, 0x100000 2435; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2436; GFX11-NEXT: s_lshl_b32 s1, s1, 1 2437; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 2438; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 24, v1 2439; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 2440; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2441; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_2) 2442; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v1 2443; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1 2444; GFX11-NEXT: v_sub_nc_u32_e32 v2, 23, v0 2445; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2446; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2447; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 2448; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s2 2449; GFX11-NEXT: s_or_b32 s2, s3, s4 2450; GFX11-NEXT: v_lshrrev_b32_e64 v1, v1, s2 2451; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) 2452; GFX11-NEXT: v_lshl_or_b32 v0, s0, v2, v0 2453; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3 2454; GFX11-NEXT: s_lshl_b32 s0, s7, 17 2455; GFX11-NEXT: s_or_b32 s0, s0, s1 2456; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2457; GFX11-NEXT: v_bfe_u32 v3, v0, 8, 8 2458; GFX11-NEXT: v_lshl_or_b32 v1, s0, v2, v1 2459; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) 2460; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v3 2461; GFX11-NEXT: v_bfe_u32 v3, v0, 16, 8 2462; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v1 2463; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 2464; GFX11-NEXT: v_and_or_b32 v0, v0, 0xff, v2 2465; GFX11-NEXT: v_lshlrev_b32_e32 v2, 16, v3 2466; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3) 2467; GFX11-NEXT: v_lshlrev_b32_e32 v3, 24, v4 2468; GFX11-NEXT: v_bfe_u32 v4, v1, 8, 8 2469; GFX11-NEXT: v_bfe_u32 v1, v1, 16, 8 2470; GFX11-NEXT: v_or3_b32 v0, v0, v2, v3 2471; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2472; GFX11-NEXT: v_lshl_or_b32 v1, v1, 8, v4 2473; GFX11-NEXT: v_readfirstlane_b32 s0, v0 2474; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 2475; GFX11-NEXT: v_readfirstlane_b32 s1, v1 2476; GFX11-NEXT: ; return to shader part epilog 2477 %lhs = bitcast i48 %lhs.arg to <2 x i24> 2478 %rhs = bitcast i48 %rhs.arg to <2 x i24> 2479 %amt = bitcast i48 %amt.arg to <2 x i24> 2480 %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) 2481 %cast.result = bitcast <2 x i24> %result to i48 2482 ret i48 %cast.result 2483} 2484 2485define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { 2486; GFX6-LABEL: v_fshr_v2i24: 2487; GFX6: ; %bb.0: 2488; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2489; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2490; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6 2491; GFX6-NEXT: v_mov_b32_e32 v7, 0xffffffe8 2492; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2493; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 2494; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2495; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6 2496; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2497; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2498; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2499; GFX6-NEXT: v_mul_lo_u32 v8, v7, v6 2500; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 2501; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2502; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8 2503; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 2504; GFX6-NEXT: v_mul_hi_u32 v6, v4, v6 2505; GFX6-NEXT: v_rcp_iflag_f32_e32 v8, v9 2506; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 2507; GFX6-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 2508; GFX6-NEXT: v_cvt_u32_f32_e32 v8, v8 2509; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 2510; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 2511; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2512; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2513; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 2514; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2515; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2516; GFX6-NEXT: v_mul_lo_u32 v6, v7, v8 2517; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 23, v4 2518; GFX6-NEXT: v_and_b32_e32 v7, 0xffffff, v7 2519; GFX6-NEXT: v_mul_hi_u32 v6, v8, v6 2520; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2521; GFX6-NEXT: v_lshlrev_b32_e32 v0, v7, v0 2522; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2523; GFX6-NEXT: v_add_i32_e32 v6, vcc, v8, v6 2524; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6 2525; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 2526; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 2527; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6 2528; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2 2529; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2530; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2531; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2 2532; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2533; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2534; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2 2535; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2536; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2537; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1 2538; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v3 2539; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 2540; GFX6-NEXT: s_setpc_b64 s[30:31] 2541; 2542; GFX8-LABEL: v_fshr_v2i24: 2543; GFX8: ; %bb.0: 2544; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2545; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2546; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6 2547; GFX8-NEXT: v_mov_b32_e32 v7, 0xffffffe8 2548; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2549; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 2550; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2551; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 2552; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2553; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2554; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2555; GFX8-NEXT: v_mul_lo_u32 v8, v7, v6 2556; GFX8-NEXT: v_lshlrev_b32_e32 v1, 1, v1 2557; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2558; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8 2559; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 2560; GFX8-NEXT: v_mul_hi_u32 v6, v4, v6 2561; GFX8-NEXT: v_rcp_iflag_f32_e32 v8, v9 2562; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 2563; GFX8-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 2564; GFX8-NEXT: v_cvt_u32_f32_e32 v8, v8 2565; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v6 2566; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 2567; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2568; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2569; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 2570; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2571; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2572; GFX8-NEXT: v_mul_lo_u32 v6, v7, v8 2573; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 23, v4 2574; GFX8-NEXT: v_and_b32_e32 v7, 0xffffff, v7 2575; GFX8-NEXT: v_mul_hi_u32 v6, v8, v6 2576; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2577; GFX8-NEXT: v_lshlrev_b32_e32 v0, v7, v0 2578; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2579; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6 2580; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 2581; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2582; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 2583; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6 2584; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2 2585; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2586; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2587; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2 2588; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2589; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2590; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 23, v2 2591; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2592; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2593; GFX8-NEXT: v_lshlrev_b32_e32 v1, v4, v1 2594; GFX8-NEXT: v_lshrrev_b32_e32 v2, v2, v3 2595; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 2596; GFX8-NEXT: s_setpc_b64 s[30:31] 2597; 2598; GFX9-LABEL: v_fshr_v2i24: 2599; GFX9: ; %bb.0: 2600; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2601; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2602; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 2603; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 2604; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9 2605; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 2606; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2607; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 2608; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 2609; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 2610; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2611; GFX9-NEXT: v_mul_lo_u32 v8, v7, v6 2612; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2613; GFX9-NEXT: v_mul_lo_u32 v7, v7, v9 2614; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2615; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 2616; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2617; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 2618; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2619; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 2620; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 2621; GFX9-NEXT: v_add_u32_e32 v7, v9, v7 2622; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7 2623; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1 2624; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 2625; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 2626; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 2627; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 2628; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2629; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2630; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 2631; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2632; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2633; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4 2634; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2635; GFX9-NEXT: v_and_b32_e32 v6, 0xffffff, v6 2636; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2637; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2 2638; GFX9-NEXT: v_sub_u32_e32 v2, v5, v7 2639; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2 2640; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2641; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2642; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2 2643; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2644; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2645; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2 2646; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2647; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2648; GFX9-NEXT: v_lshrrev_b32_e32 v2, v2, v3 2649; GFX9-NEXT: v_lshl_or_b32 v1, v1, v4, v2 2650; GFX9-NEXT: s_setpc_b64 s[30:31] 2651; 2652; GFX10-LABEL: v_fshr_v2i24: 2653; GFX10: ; %bb.0: 2654; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2655; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2656; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2657; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 2658; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2659; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2660; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2661; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 2662; GFX10-NEXT: v_rcp_iflag_f32_e32 v7, v7 2663; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2664; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2665; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v1 2666; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2667; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 2668; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 2669; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v7 2670; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 2671; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 2672; GFX10-NEXT: v_mul_hi_u32 v8, v6, v8 2673; GFX10-NEXT: v_mul_hi_u32 v9, v7, v9 2674; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v8 2675; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v9 2676; GFX10-NEXT: v_mul_hi_u32 v6, v4, v6 2677; GFX10-NEXT: v_mul_hi_u32 v7, v5, v7 2678; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 2679; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24 2680; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6 2681; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7 2682; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 2683; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2684; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 2685; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo 2686; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2687; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 2688; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 2689; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2690; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 2691; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo 2692; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2693; GFX10-NEXT: v_sub_nc_u32_e32 v6, 23, v4 2694; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 2695; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2696; GFX10-NEXT: v_and_b32_e32 v6, 0xffffff, v6 2697; GFX10-NEXT: v_sub_nc_u32_e32 v7, 23, v5 2698; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2699; GFX10-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2700; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v7 2701; GFX10-NEXT: v_lshrrev_b32_e32 v3, v5, v3 2702; GFX10-NEXT: v_lshl_or_b32 v0, v0, v6, v2 2703; GFX10-NEXT: v_lshl_or_b32 v1, v1, v4, v3 2704; GFX10-NEXT: s_setpc_b64 s[30:31] 2705; 2706; GFX11-LABEL: v_fshr_v2i24: 2707; GFX11: ; %bb.0: 2708; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2709; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2710; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2711; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 2712; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2713; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2714; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2715; GFX11-NEXT: v_rcp_iflag_f32_e32 v6, v6 2716; GFX11-NEXT: v_rcp_iflag_f32_e32 v7, v7 2717; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2718; GFX11-NEXT: s_waitcnt_depctr 0xfff 2719; GFX11-NEXT: v_dual_mul_f32 v6, 0x4f7ffffe, v6 :: v_dual_lshlrev_b32 v1, 1, v1 2720; GFX11-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 2721; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2722; GFX11-NEXT: v_cvt_u32_f32_e32 v6, v6 2723; GFX11-NEXT: v_cvt_u32_f32_e32 v7, v7 2724; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2725; GFX11-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 2726; GFX11-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 2727; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2728; GFX11-NEXT: v_mul_hi_u32 v8, v6, v8 2729; GFX11-NEXT: v_mul_hi_u32 v9, v7, v9 2730; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2731; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v8 2732; GFX11-NEXT: v_add_nc_u32_e32 v7, v7, v9 2733; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2734; GFX11-NEXT: v_mul_hi_u32 v7, v5, v7 2735; GFX11-NEXT: v_mul_lo_u32 v7, v7, 24 2736; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2737; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v7 2738; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 2739; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2740; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2741; GFX11-NEXT: v_mul_hi_u32 v6, v4, v6 2742; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24 2743; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2744; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v6 2745; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 2746; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2747; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 2748; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo 2749; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2750; GFX11-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 2751; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 2752; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2753; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 2754; GFX11-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 2755; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo 2756; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2757; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 2758; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4 2759; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4 2760; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2761; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6 2762; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5 2763; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2764; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 2765; GFX11-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2766; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v7 2767; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 2768; GFX11-NEXT: v_lshrrev_b32_e32 v3, v5, v3 2769; GFX11-NEXT: v_lshl_or_b32 v0, v0, v6, v2 2770; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 2771; GFX11-NEXT: v_lshl_or_b32 v1, v1, v4, v3 2772; GFX11-NEXT: s_setpc_b64 s[30:31] 2773 %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) 2774 ret <2 x i24> %result 2775} 2776 2777define amdgpu_ps i32 @s_fshr_i32(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) { 2778; GFX6-LABEL: s_fshr_i32: 2779; GFX6: ; %bb.0: 2780; GFX6-NEXT: v_mov_b32_e32 v0, s1 2781; GFX6-NEXT: v_mov_b32_e32 v1, s2 2782; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 2783; GFX6-NEXT: v_readfirstlane_b32 s0, v0 2784; GFX6-NEXT: ; return to shader part epilog 2785; 2786; GFX8-LABEL: s_fshr_i32: 2787; GFX8: ; %bb.0: 2788; GFX8-NEXT: v_mov_b32_e32 v0, s1 2789; GFX8-NEXT: v_mov_b32_e32 v1, s2 2790; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 2791; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2792; GFX8-NEXT: ; return to shader part epilog 2793; 2794; GFX9-LABEL: s_fshr_i32: 2795; GFX9: ; %bb.0: 2796; GFX9-NEXT: v_mov_b32_e32 v0, s1 2797; GFX9-NEXT: v_mov_b32_e32 v1, s2 2798; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 2799; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2800; GFX9-NEXT: ; return to shader part epilog 2801; 2802; GFX10-LABEL: s_fshr_i32: 2803; GFX10: ; %bb.0: 2804; GFX10-NEXT: v_mov_b32_e32 v0, s2 2805; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0 2806; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2807; GFX10-NEXT: ; return to shader part epilog 2808; 2809; GFX11-LABEL: s_fshr_i32: 2810; GFX11: ; %bb.0: 2811; GFX11-NEXT: v_mov_b32_e32 v0, s2 2812; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2813; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, v0 2814; GFX11-NEXT: v_readfirstlane_b32 s0, v0 2815; GFX11-NEXT: ; return to shader part epilog 2816 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2817 ret i32 %result 2818} 2819 2820define amdgpu_ps i32 @s_fshr_i32_5(i32 inreg %lhs, i32 inreg %rhs) { 2821; GFX6-LABEL: s_fshr_i32_5: 2822; GFX6: ; %bb.0: 2823; GFX6-NEXT: v_mov_b32_e32 v0, s1 2824; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 5 2825; GFX6-NEXT: v_readfirstlane_b32 s0, v0 2826; GFX6-NEXT: ; return to shader part epilog 2827; 2828; GFX8-LABEL: s_fshr_i32_5: 2829; GFX8: ; %bb.0: 2830; GFX8-NEXT: v_mov_b32_e32 v0, s1 2831; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 5 2832; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2833; GFX8-NEXT: ; return to shader part epilog 2834; 2835; GFX9-LABEL: s_fshr_i32_5: 2836; GFX9: ; %bb.0: 2837; GFX9-NEXT: v_mov_b32_e32 v0, s1 2838; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 5 2839; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2840; GFX9-NEXT: ; return to shader part epilog 2841; 2842; GFX10-LABEL: s_fshr_i32_5: 2843; GFX10: ; %bb.0: 2844; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 5 2845; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2846; GFX10-NEXT: ; return to shader part epilog 2847; 2848; GFX11-LABEL: s_fshr_i32_5: 2849; GFX11: ; %bb.0: 2850; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 5 2851; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2852; GFX11-NEXT: v_readfirstlane_b32 s0, v0 2853; GFX11-NEXT: ; return to shader part epilog 2854 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5) 2855 ret i32 %result 2856} 2857 2858define amdgpu_ps i32 @s_fshr_i32_8(i32 inreg %lhs, i32 inreg %rhs) { 2859; GFX6-LABEL: s_fshr_i32_8: 2860; GFX6: ; %bb.0: 2861; GFX6-NEXT: v_mov_b32_e32 v0, s1 2862; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 8 2863; GFX6-NEXT: v_readfirstlane_b32 s0, v0 2864; GFX6-NEXT: ; return to shader part epilog 2865; 2866; GFX8-LABEL: s_fshr_i32_8: 2867; GFX8: ; %bb.0: 2868; GFX8-NEXT: v_mov_b32_e32 v0, s1 2869; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 8 2870; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2871; GFX8-NEXT: ; return to shader part epilog 2872; 2873; GFX9-LABEL: s_fshr_i32_8: 2874; GFX9: ; %bb.0: 2875; GFX9-NEXT: v_mov_b32_e32 v0, s1 2876; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 8 2877; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2878; GFX9-NEXT: ; return to shader part epilog 2879; 2880; GFX10-LABEL: s_fshr_i32_8: 2881; GFX10: ; %bb.0: 2882; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 8 2883; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2884; GFX10-NEXT: ; return to shader part epilog 2885; 2886; GFX11-LABEL: s_fshr_i32_8: 2887; GFX11: ; %bb.0: 2888; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 8 2889; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2890; GFX11-NEXT: v_readfirstlane_b32 s0, v0 2891; GFX11-NEXT: ; return to shader part epilog 2892 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8) 2893 ret i32 %result 2894} 2895 2896define i32 @v_fshr_i32(i32 %lhs, i32 %rhs, i32 %amt) { 2897; GFX6-LABEL: v_fshr_i32: 2898; GFX6: ; %bb.0: 2899; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2900; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, v2 2901; GFX6-NEXT: s_setpc_b64 s[30:31] 2902; 2903; GFX8-LABEL: v_fshr_i32: 2904; GFX8: ; %bb.0: 2905; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2906; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, v2 2907; GFX8-NEXT: s_setpc_b64 s[30:31] 2908; 2909; GFX9-LABEL: v_fshr_i32: 2910; GFX9: ; %bb.0: 2911; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2912; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, v2 2913; GFX9-NEXT: s_setpc_b64 s[30:31] 2914; 2915; GFX10-LABEL: v_fshr_i32: 2916; GFX10: ; %bb.0: 2917; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2918; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2919; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2 2920; GFX10-NEXT: s_setpc_b64 s[30:31] 2921; 2922; GFX11-LABEL: v_fshr_i32: 2923; GFX11: ; %bb.0: 2924; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2925; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2926; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, v2 2927; GFX11-NEXT: s_setpc_b64 s[30:31] 2928 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2929 ret i32 %result 2930} 2931 2932define i32 @v_fshr_i32_5(i32 %lhs, i32 %rhs) { 2933; GFX6-LABEL: v_fshr_i32_5: 2934; GFX6: ; %bb.0: 2935; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2936; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, 5 2937; GFX6-NEXT: s_setpc_b64 s[30:31] 2938; 2939; GFX8-LABEL: v_fshr_i32_5: 2940; GFX8: ; %bb.0: 2941; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2942; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, 5 2943; GFX8-NEXT: s_setpc_b64 s[30:31] 2944; 2945; GFX9-LABEL: v_fshr_i32_5: 2946; GFX9: ; %bb.0: 2947; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2948; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, 5 2949; GFX9-NEXT: s_setpc_b64 s[30:31] 2950; 2951; GFX10-LABEL: v_fshr_i32_5: 2952; GFX10: ; %bb.0: 2953; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2954; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2955; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, 5 2956; GFX10-NEXT: s_setpc_b64 s[30:31] 2957; 2958; GFX11-LABEL: v_fshr_i32_5: 2959; GFX11: ; %bb.0: 2960; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2961; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2962; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 5 2963; GFX11-NEXT: s_setpc_b64 s[30:31] 2964 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5) 2965 ret i32 %result 2966} 2967 2968define i32 @v_fshr_i32_8(i32 %lhs, i32 %rhs) { 2969; GFX6-LABEL: v_fshr_i32_8: 2970; GFX6: ; %bb.0: 2971; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2972; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, 8 2973; GFX6-NEXT: s_setpc_b64 s[30:31] 2974; 2975; GFX8-LABEL: v_fshr_i32_8: 2976; GFX8: ; %bb.0: 2977; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2978; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, 8 2979; GFX8-NEXT: s_setpc_b64 s[30:31] 2980; 2981; GFX9-LABEL: v_fshr_i32_8: 2982; GFX9: ; %bb.0: 2983; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2984; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, 8 2985; GFX9-NEXT: s_setpc_b64 s[30:31] 2986; 2987; GFX10-LABEL: v_fshr_i32_8: 2988; GFX10: ; %bb.0: 2989; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2990; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2991; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, 8 2992; GFX10-NEXT: s_setpc_b64 s[30:31] 2993; 2994; GFX11-LABEL: v_fshr_i32_8: 2995; GFX11: ; %bb.0: 2996; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2997; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 2998; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 8 2999; GFX11-NEXT: s_setpc_b64 s[30:31] 3000 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8) 3001 ret i32 %result 3002} 3003 3004define amdgpu_ps float @v_fshr_i32_ssv(i32 inreg %lhs, i32 inreg %rhs, i32 %amt) { 3005; GFX6-LABEL: v_fshr_i32_ssv: 3006; GFX6: ; %bb.0: 3007; GFX6-NEXT: v_mov_b32_e32 v1, s1 3008; GFX6-NEXT: v_alignbit_b32 v0, s0, v1, v0 3009; GFX6-NEXT: ; return to shader part epilog 3010; 3011; GFX8-LABEL: v_fshr_i32_ssv: 3012; GFX8: ; %bb.0: 3013; GFX8-NEXT: v_mov_b32_e32 v1, s1 3014; GFX8-NEXT: v_alignbit_b32 v0, s0, v1, v0 3015; GFX8-NEXT: ; return to shader part epilog 3016; 3017; GFX9-LABEL: v_fshr_i32_ssv: 3018; GFX9: ; %bb.0: 3019; GFX9-NEXT: v_mov_b32_e32 v1, s1 3020; GFX9-NEXT: v_alignbit_b32 v0, s0, v1, v0 3021; GFX9-NEXT: ; return to shader part epilog 3022; 3023; GFX10-LABEL: v_fshr_i32_ssv: 3024; GFX10: ; %bb.0: 3025; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0 3026; GFX10-NEXT: ; return to shader part epilog 3027; 3028; GFX11-LABEL: v_fshr_i32_ssv: 3029; GFX11: ; %bb.0: 3030; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, v0 3031; GFX11-NEXT: ; return to shader part epilog 3032 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 3033 %cast.result = bitcast i32 %result to float 3034 ret float %cast.result 3035} 3036 3037define amdgpu_ps float @v_fshr_i32_svs(i32 inreg %lhs, i32 %rhs, i32 inreg %amt) { 3038; GFX6-LABEL: v_fshr_i32_svs: 3039; GFX6: ; %bb.0: 3040; GFX6-NEXT: v_mov_b32_e32 v1, s1 3041; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 3042; GFX6-NEXT: ; return to shader part epilog 3043; 3044; GFX8-LABEL: v_fshr_i32_svs: 3045; GFX8: ; %bb.0: 3046; GFX8-NEXT: v_mov_b32_e32 v1, s1 3047; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 3048; GFX8-NEXT: ; return to shader part epilog 3049; 3050; GFX9-LABEL: v_fshr_i32_svs: 3051; GFX9: ; %bb.0: 3052; GFX9-NEXT: v_mov_b32_e32 v1, s1 3053; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 3054; GFX9-NEXT: ; return to shader part epilog 3055; 3056; GFX10-LABEL: v_fshr_i32_svs: 3057; GFX10: ; %bb.0: 3058; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1 3059; GFX10-NEXT: ; return to shader part epilog 3060; 3061; GFX11-LABEL: v_fshr_i32_svs: 3062; GFX11: ; %bb.0: 3063; GFX11-NEXT: v_alignbit_b32 v0, s0, v0, s1 3064; GFX11-NEXT: ; return to shader part epilog 3065 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 3066 %cast.result = bitcast i32 %result to float 3067 ret float %cast.result 3068} 3069 3070define amdgpu_ps float @v_fshr_i32_vss(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) { 3071; GFX6-LABEL: v_fshr_i32_vss: 3072; GFX6: ; %bb.0: 3073; GFX6-NEXT: v_mov_b32_e32 v0, s1 3074; GFX6-NEXT: v_mov_b32_e32 v1, s2 3075; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 3076; GFX6-NEXT: ; return to shader part epilog 3077; 3078; GFX8-LABEL: v_fshr_i32_vss: 3079; GFX8: ; %bb.0: 3080; GFX8-NEXT: v_mov_b32_e32 v0, s1 3081; GFX8-NEXT: v_mov_b32_e32 v1, s2 3082; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 3083; GFX8-NEXT: ; return to shader part epilog 3084; 3085; GFX9-LABEL: v_fshr_i32_vss: 3086; GFX9: ; %bb.0: 3087; GFX9-NEXT: v_mov_b32_e32 v0, s1 3088; GFX9-NEXT: v_mov_b32_e32 v1, s2 3089; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 3090; GFX9-NEXT: ; return to shader part epilog 3091; 3092; GFX10-LABEL: v_fshr_i32_vss: 3093; GFX10: ; %bb.0: 3094; GFX10-NEXT: v_mov_b32_e32 v0, s2 3095; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0 3096; GFX10-NEXT: ; return to shader part epilog 3097; 3098; GFX11-LABEL: v_fshr_i32_vss: 3099; GFX11: ; %bb.0: 3100; GFX11-NEXT: v_mov_b32_e32 v0, s2 3101; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3102; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, v0 3103; GFX11-NEXT: ; return to shader part epilog 3104 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 3105 %cast.result = bitcast i32 %result to float 3106 ret float %cast.result 3107} 3108 3109define <2 x i32> @v_fshr_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) { 3110; GFX6-LABEL: v_fshr_v2i32: 3111; GFX6: ; %bb.0: 3112; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3113; GFX6-NEXT: v_alignbit_b32 v0, v0, v2, v4 3114; GFX6-NEXT: v_alignbit_b32 v1, v1, v3, v5 3115; GFX6-NEXT: s_setpc_b64 s[30:31] 3116; 3117; GFX8-LABEL: v_fshr_v2i32: 3118; GFX8: ; %bb.0: 3119; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3120; GFX8-NEXT: v_alignbit_b32 v0, v0, v2, v4 3121; GFX8-NEXT: v_alignbit_b32 v1, v1, v3, v5 3122; GFX8-NEXT: s_setpc_b64 s[30:31] 3123; 3124; GFX9-LABEL: v_fshr_v2i32: 3125; GFX9: ; %bb.0: 3126; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3127; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4 3128; GFX9-NEXT: v_alignbit_b32 v1, v1, v3, v5 3129; GFX9-NEXT: s_setpc_b64 s[30:31] 3130; 3131; GFX10-LABEL: v_fshr_v2i32: 3132; GFX10: ; %bb.0: 3133; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3134; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3135; GFX10-NEXT: v_alignbit_b32 v0, v0, v2, v4 3136; GFX10-NEXT: v_alignbit_b32 v1, v1, v3, v5 3137; GFX10-NEXT: s_setpc_b64 s[30:31] 3138; 3139; GFX11-LABEL: v_fshr_v2i32: 3140; GFX11: ; %bb.0: 3141; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3142; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3143; GFX11-NEXT: v_alignbit_b32 v0, v0, v2, v4 3144; GFX11-NEXT: v_alignbit_b32 v1, v1, v3, v5 3145; GFX11-NEXT: s_setpc_b64 s[30:31] 3146 %result = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) 3147 ret <2 x i32> %result 3148} 3149 3150define <3 x i32> @v_fshr_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) { 3151; GFX6-LABEL: v_fshr_v3i32: 3152; GFX6: ; %bb.0: 3153; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3154; GFX6-NEXT: v_alignbit_b32 v0, v0, v3, v6 3155; GFX6-NEXT: v_alignbit_b32 v1, v1, v4, v7 3156; GFX6-NEXT: v_alignbit_b32 v2, v2, v5, v8 3157; GFX6-NEXT: s_setpc_b64 s[30:31] 3158; 3159; GFX8-LABEL: v_fshr_v3i32: 3160; GFX8: ; %bb.0: 3161; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3162; GFX8-NEXT: v_alignbit_b32 v0, v0, v3, v6 3163; GFX8-NEXT: v_alignbit_b32 v1, v1, v4, v7 3164; GFX8-NEXT: v_alignbit_b32 v2, v2, v5, v8 3165; GFX8-NEXT: s_setpc_b64 s[30:31] 3166; 3167; GFX9-LABEL: v_fshr_v3i32: 3168; GFX9: ; %bb.0: 3169; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3170; GFX9-NEXT: v_alignbit_b32 v0, v0, v3, v6 3171; GFX9-NEXT: v_alignbit_b32 v1, v1, v4, v7 3172; GFX9-NEXT: v_alignbit_b32 v2, v2, v5, v8 3173; GFX9-NEXT: s_setpc_b64 s[30:31] 3174; 3175; GFX10-LABEL: v_fshr_v3i32: 3176; GFX10: ; %bb.0: 3177; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3178; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3179; GFX10-NEXT: v_alignbit_b32 v0, v0, v3, v6 3180; GFX10-NEXT: v_alignbit_b32 v1, v1, v4, v7 3181; GFX10-NEXT: v_alignbit_b32 v2, v2, v5, v8 3182; GFX10-NEXT: s_setpc_b64 s[30:31] 3183; 3184; GFX11-LABEL: v_fshr_v3i32: 3185; GFX11: ; %bb.0: 3186; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3187; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3188; GFX11-NEXT: v_alignbit_b32 v0, v0, v3, v6 3189; GFX11-NEXT: v_alignbit_b32 v1, v1, v4, v7 3190; GFX11-NEXT: v_alignbit_b32 v2, v2, v5, v8 3191; GFX11-NEXT: s_setpc_b64 s[30:31] 3192 %result = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) 3193 ret <3 x i32> %result 3194} 3195 3196define <4 x i32> @v_fshr_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) { 3197; GFX6-LABEL: v_fshr_v4i32: 3198; GFX6: ; %bb.0: 3199; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3200; GFX6-NEXT: v_alignbit_b32 v0, v0, v4, v8 3201; GFX6-NEXT: v_alignbit_b32 v1, v1, v5, v9 3202; GFX6-NEXT: v_alignbit_b32 v2, v2, v6, v10 3203; GFX6-NEXT: v_alignbit_b32 v3, v3, v7, v11 3204; GFX6-NEXT: s_setpc_b64 s[30:31] 3205; 3206; GFX8-LABEL: v_fshr_v4i32: 3207; GFX8: ; %bb.0: 3208; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3209; GFX8-NEXT: v_alignbit_b32 v0, v0, v4, v8 3210; GFX8-NEXT: v_alignbit_b32 v1, v1, v5, v9 3211; GFX8-NEXT: v_alignbit_b32 v2, v2, v6, v10 3212; GFX8-NEXT: v_alignbit_b32 v3, v3, v7, v11 3213; GFX8-NEXT: s_setpc_b64 s[30:31] 3214; 3215; GFX9-LABEL: v_fshr_v4i32: 3216; GFX9: ; %bb.0: 3217; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3218; GFX9-NEXT: v_alignbit_b32 v0, v0, v4, v8 3219; GFX9-NEXT: v_alignbit_b32 v1, v1, v5, v9 3220; GFX9-NEXT: v_alignbit_b32 v2, v2, v6, v10 3221; GFX9-NEXT: v_alignbit_b32 v3, v3, v7, v11 3222; GFX9-NEXT: s_setpc_b64 s[30:31] 3223; 3224; GFX10-LABEL: v_fshr_v4i32: 3225; GFX10: ; %bb.0: 3226; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3227; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3228; GFX10-NEXT: v_alignbit_b32 v0, v0, v4, v8 3229; GFX10-NEXT: v_alignbit_b32 v1, v1, v5, v9 3230; GFX10-NEXT: v_alignbit_b32 v2, v2, v6, v10 3231; GFX10-NEXT: v_alignbit_b32 v3, v3, v7, v11 3232; GFX10-NEXT: s_setpc_b64 s[30:31] 3233; 3234; GFX11-LABEL: v_fshr_v4i32: 3235; GFX11: ; %bb.0: 3236; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3237; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3238; GFX11-NEXT: v_alignbit_b32 v0, v0, v4, v8 3239; GFX11-NEXT: v_alignbit_b32 v1, v1, v5, v9 3240; GFX11-NEXT: v_alignbit_b32 v2, v2, v6, v10 3241; GFX11-NEXT: v_alignbit_b32 v3, v3, v7, v11 3242; GFX11-NEXT: s_setpc_b64 s[30:31] 3243 %result = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) 3244 ret <4 x i32> %result 3245} 3246 3247define amdgpu_ps i16 @s_fshr_i16(i16 inreg %lhs, i16 inreg %rhs, i16 inreg %amt) { 3248; GFX6-LABEL: s_fshr_i16: 3249; GFX6: ; %bb.0: 3250; GFX6-NEXT: s_and_b32 s3, s2, 15 3251; GFX6-NEXT: s_andn2_b32 s2, 15, s2 3252; GFX6-NEXT: s_lshl_b32 s0, s0, 1 3253; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 3254; GFX6-NEXT: s_lshl_b32 s0, s0, s2 3255; GFX6-NEXT: s_bfe_u32 s2, s3, 0x100000 3256; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 3257; GFX6-NEXT: s_lshr_b32 s1, s1, s2 3258; GFX6-NEXT: s_or_b32 s0, s0, s1 3259; GFX6-NEXT: ; return to shader part epilog 3260; 3261; GFX8-LABEL: s_fshr_i16: 3262; GFX8: ; %bb.0: 3263; GFX8-NEXT: s_and_b32 s3, s2, 15 3264; GFX8-NEXT: s_andn2_b32 s2, 15, s2 3265; GFX8-NEXT: s_bfe_u32 s4, 1, 0x100000 3266; GFX8-NEXT: s_lshl_b32 s0, s0, s4 3267; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3268; GFX8-NEXT: s_lshl_b32 s0, s0, s2 3269; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3270; GFX8-NEXT: s_bfe_u32 s2, s3, 0x100000 3271; GFX8-NEXT: s_lshr_b32 s1, s1, s2 3272; GFX8-NEXT: s_or_b32 s0, s0, s1 3273; GFX8-NEXT: ; return to shader part epilog 3274; 3275; GFX9-LABEL: s_fshr_i16: 3276; GFX9: ; %bb.0: 3277; GFX9-NEXT: s_and_b32 s3, s2, 15 3278; GFX9-NEXT: s_andn2_b32 s2, 15, s2 3279; GFX9-NEXT: s_bfe_u32 s4, 1, 0x100000 3280; GFX9-NEXT: s_lshl_b32 s0, s0, s4 3281; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000 3282; GFX9-NEXT: s_lshl_b32 s0, s0, s2 3283; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 3284; GFX9-NEXT: s_bfe_u32 s2, s3, 0x100000 3285; GFX9-NEXT: s_lshr_b32 s1, s1, s2 3286; GFX9-NEXT: s_or_b32 s0, s0, s1 3287; GFX9-NEXT: ; return to shader part epilog 3288; 3289; GFX10-LABEL: s_fshr_i16: 3290; GFX10: ; %bb.0: 3291; GFX10-NEXT: s_and_b32 s3, s2, 15 3292; GFX10-NEXT: s_bfe_u32 s4, 1, 0x100000 3293; GFX10-NEXT: s_andn2_b32 s2, 15, s2 3294; GFX10-NEXT: s_lshl_b32 s0, s0, s4 3295; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 3296; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 3297; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000 3298; GFX10-NEXT: s_lshl_b32 s0, s0, s2 3299; GFX10-NEXT: s_lshr_b32 s1, s1, s3 3300; GFX10-NEXT: s_or_b32 s0, s0, s1 3301; GFX10-NEXT: ; return to shader part epilog 3302; 3303; GFX11-LABEL: s_fshr_i16: 3304; GFX11: ; %bb.0: 3305; GFX11-NEXT: s_and_b32 s3, s2, 15 3306; GFX11-NEXT: s_bfe_u32 s4, 1, 0x100000 3307; GFX11-NEXT: s_and_not1_b32 s2, 15, s2 3308; GFX11-NEXT: s_lshl_b32 s0, s0, s4 3309; GFX11-NEXT: s_bfe_u32 s2, s2, 0x100000 3310; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000 3311; GFX11-NEXT: s_bfe_u32 s3, s3, 0x100000 3312; GFX11-NEXT: s_lshl_b32 s0, s0, s2 3313; GFX11-NEXT: s_lshr_b32 s1, s1, s3 3314; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3315; GFX11-NEXT: s_or_b32 s0, s0, s1 3316; GFX11-NEXT: ; return to shader part epilog 3317 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 3318 ret i16 %result 3319} 3320 3321define amdgpu_ps i16 @s_fshr_i16_4(i16 inreg %lhs, i16 inreg %rhs) { 3322; GFX6-LABEL: s_fshr_i16_4: 3323; GFX6: ; %bb.0: 3324; GFX6-NEXT: s_lshl_b32 s0, s0, 12 3325; GFX6-NEXT: s_bfe_u32 s1, s1, 0xc0004 3326; GFX6-NEXT: s_or_b32 s0, s0, s1 3327; GFX6-NEXT: ; return to shader part epilog 3328; 3329; GFX8-LABEL: s_fshr_i16_4: 3330; GFX8: ; %bb.0: 3331; GFX8-NEXT: s_bfe_u32 s2, 12, 0x100000 3332; GFX8-NEXT: s_lshl_b32 s0, s0, s2 3333; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3334; GFX8-NEXT: s_bfe_u32 s2, 4, 0x100000 3335; GFX8-NEXT: s_lshr_b32 s1, s1, s2 3336; GFX8-NEXT: s_or_b32 s0, s0, s1 3337; GFX8-NEXT: ; return to shader part epilog 3338; 3339; GFX9-LABEL: s_fshr_i16_4: 3340; GFX9: ; %bb.0: 3341; GFX9-NEXT: s_bfe_u32 s2, 12, 0x100000 3342; GFX9-NEXT: s_lshl_b32 s0, s0, s2 3343; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 3344; GFX9-NEXT: s_bfe_u32 s2, 4, 0x100000 3345; GFX9-NEXT: s_lshr_b32 s1, s1, s2 3346; GFX9-NEXT: s_or_b32 s0, s0, s1 3347; GFX9-NEXT: ; return to shader part epilog 3348; 3349; GFX10-LABEL: s_fshr_i16_4: 3350; GFX10: ; %bb.0: 3351; GFX10-NEXT: s_bfe_u32 s2, 12, 0x100000 3352; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 3353; GFX10-NEXT: s_bfe_u32 s3, 4, 0x100000 3354; GFX10-NEXT: s_lshl_b32 s0, s0, s2 3355; GFX10-NEXT: s_lshr_b32 s1, s1, s3 3356; GFX10-NEXT: s_or_b32 s0, s0, s1 3357; GFX10-NEXT: ; return to shader part epilog 3358; 3359; GFX11-LABEL: s_fshr_i16_4: 3360; GFX11: ; %bb.0: 3361; GFX11-NEXT: s_bfe_u32 s2, 12, 0x100000 3362; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000 3363; GFX11-NEXT: s_bfe_u32 s3, 4, 0x100000 3364; GFX11-NEXT: s_lshl_b32 s0, s0, s2 3365; GFX11-NEXT: s_lshr_b32 s1, s1, s3 3366; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3367; GFX11-NEXT: s_or_b32 s0, s0, s1 3368; GFX11-NEXT: ; return to shader part epilog 3369 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4) 3370 ret i16 %result 3371} 3372 3373define amdgpu_ps i16 @s_fshr_i16_5(i16 inreg %lhs, i16 inreg %rhs) { 3374; GFX6-LABEL: s_fshr_i16_5: 3375; GFX6: ; %bb.0: 3376; GFX6-NEXT: s_lshl_b32 s0, s0, 11 3377; GFX6-NEXT: s_bfe_u32 s1, s1, 0xb0005 3378; GFX6-NEXT: s_or_b32 s0, s0, s1 3379; GFX6-NEXT: ; return to shader part epilog 3380; 3381; GFX8-LABEL: s_fshr_i16_5: 3382; GFX8: ; %bb.0: 3383; GFX8-NEXT: s_bfe_u32 s2, 11, 0x100000 3384; GFX8-NEXT: s_lshl_b32 s0, s0, s2 3385; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3386; GFX8-NEXT: s_bfe_u32 s2, 5, 0x100000 3387; GFX8-NEXT: s_lshr_b32 s1, s1, s2 3388; GFX8-NEXT: s_or_b32 s0, s0, s1 3389; GFX8-NEXT: ; return to shader part epilog 3390; 3391; GFX9-LABEL: s_fshr_i16_5: 3392; GFX9: ; %bb.0: 3393; GFX9-NEXT: s_bfe_u32 s2, 11, 0x100000 3394; GFX9-NEXT: s_lshl_b32 s0, s0, s2 3395; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 3396; GFX9-NEXT: s_bfe_u32 s2, 5, 0x100000 3397; GFX9-NEXT: s_lshr_b32 s1, s1, s2 3398; GFX9-NEXT: s_or_b32 s0, s0, s1 3399; GFX9-NEXT: ; return to shader part epilog 3400; 3401; GFX10-LABEL: s_fshr_i16_5: 3402; GFX10: ; %bb.0: 3403; GFX10-NEXT: s_bfe_u32 s2, 11, 0x100000 3404; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 3405; GFX10-NEXT: s_bfe_u32 s3, 5, 0x100000 3406; GFX10-NEXT: s_lshl_b32 s0, s0, s2 3407; GFX10-NEXT: s_lshr_b32 s1, s1, s3 3408; GFX10-NEXT: s_or_b32 s0, s0, s1 3409; GFX10-NEXT: ; return to shader part epilog 3410; 3411; GFX11-LABEL: s_fshr_i16_5: 3412; GFX11: ; %bb.0: 3413; GFX11-NEXT: s_bfe_u32 s2, 11, 0x100000 3414; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000 3415; GFX11-NEXT: s_bfe_u32 s3, 5, 0x100000 3416; GFX11-NEXT: s_lshl_b32 s0, s0, s2 3417; GFX11-NEXT: s_lshr_b32 s1, s1, s3 3418; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3419; GFX11-NEXT: s_or_b32 s0, s0, s1 3420; GFX11-NEXT: ; return to shader part epilog 3421 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5) 3422 ret i16 %result 3423} 3424 3425define i16 @v_fshr_i16(i16 %lhs, i16 %rhs, i16 %amt) { 3426; GFX6-LABEL: v_fshr_i16: 3427; GFX6: ; %bb.0: 3428; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3429; GFX6-NEXT: v_and_b32_e32 v3, 15, v2 3430; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 3431; GFX6-NEXT: v_and_b32_e32 v2, 15, v2 3432; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 3433; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16 3434; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 3435; GFX6-NEXT: v_bfe_u32 v2, v3, 0, 16 3436; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 3437; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 3438; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3439; GFX6-NEXT: s_setpc_b64 s[30:31] 3440; 3441; GFX8-LABEL: v_fshr_i16: 3442; GFX8: ; %bb.0: 3443; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3444; GFX8-NEXT: v_and_b32_e32 v3, 15, v2 3445; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 3446; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 3447; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 3448; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 3449; GFX8-NEXT: v_lshrrev_b16_e32 v1, v3, v1 3450; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3451; GFX8-NEXT: s_setpc_b64 s[30:31] 3452; 3453; GFX9-LABEL: v_fshr_i16: 3454; GFX9: ; %bb.0: 3455; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3456; GFX9-NEXT: v_and_b32_e32 v3, 15, v2 3457; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 3458; GFX9-NEXT: v_and_b32_e32 v2, 15, v2 3459; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 3460; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 3461; GFX9-NEXT: v_lshrrev_b16_e32 v1, v3, v1 3462; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3463; GFX9-NEXT: s_setpc_b64 s[30:31] 3464; 3465; GFX10-LABEL: v_fshr_i16: 3466; GFX10: ; %bb.0: 3467; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3468; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3469; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 3470; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 3471; GFX10-NEXT: v_and_b32_e32 v2, 15, v2 3472; GFX10-NEXT: v_and_b32_e32 v3, 15, v3 3473; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 3474; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0 3475; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3476; GFX10-NEXT: s_setpc_b64 s[30:31] 3477; 3478; GFX11-LABEL: v_fshr_i16: 3479; GFX11: ; %bb.0: 3480; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3481; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3482; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2 3483; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 3484; GFX11-NEXT: v_and_b32_e32 v2, 15, v2 3485; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 3486; GFX11-NEXT: v_and_b32_e32 v3, 15, v3 3487; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1 3488; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 3489; GFX11-NEXT: v_lshlrev_b16 v0, v3, v0 3490; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 3491; GFX11-NEXT: s_setpc_b64 s[30:31] 3492 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 3493 ret i16 %result 3494} 3495 3496define i16 @v_fshr_i16_4(i16 %lhs, i16 %rhs) { 3497; GFX6-LABEL: v_fshr_i16_4: 3498; GFX6: ; %bb.0: 3499; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3500; GFX6-NEXT: v_lshlrev_b32_e32 v0, 12, v0 3501; GFX6-NEXT: v_bfe_u32 v1, v1, 4, 12 3502; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3503; GFX6-NEXT: s_setpc_b64 s[30:31] 3504; 3505; GFX8-LABEL: v_fshr_i16_4: 3506; GFX8: ; %bb.0: 3507; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3508; GFX8-NEXT: v_lshlrev_b16_e32 v0, 12, v0 3509; GFX8-NEXT: v_lshrrev_b16_e32 v1, 4, v1 3510; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3511; GFX8-NEXT: s_setpc_b64 s[30:31] 3512; 3513; GFX9-LABEL: v_fshr_i16_4: 3514; GFX9: ; %bb.0: 3515; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3516; GFX9-NEXT: v_lshlrev_b16_e32 v0, 12, v0 3517; GFX9-NEXT: v_lshrrev_b16_e32 v1, 4, v1 3518; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3519; GFX9-NEXT: s_setpc_b64 s[30:31] 3520; 3521; GFX10-LABEL: v_fshr_i16_4: 3522; GFX10: ; %bb.0: 3523; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3524; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3525; GFX10-NEXT: v_lshlrev_b16 v0, 12, v0 3526; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1 3527; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3528; GFX10-NEXT: s_setpc_b64 s[30:31] 3529; 3530; GFX11-LABEL: v_fshr_i16_4: 3531; GFX11: ; %bb.0: 3532; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3533; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3534; GFX11-NEXT: v_lshlrev_b16 v0, 12, v0 3535; GFX11-NEXT: v_lshrrev_b16 v1, 4, v1 3536; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3537; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 3538; GFX11-NEXT: s_setpc_b64 s[30:31] 3539 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4) 3540 ret i16 %result 3541} 3542 3543define i16 @v_fshr_i16_5(i16 %lhs, i16 %rhs) { 3544; GFX6-LABEL: v_fshr_i16_5: 3545; GFX6: ; %bb.0: 3546; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3547; GFX6-NEXT: v_lshlrev_b32_e32 v0, 11, v0 3548; GFX6-NEXT: v_bfe_u32 v1, v1, 5, 11 3549; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3550; GFX6-NEXT: s_setpc_b64 s[30:31] 3551; 3552; GFX8-LABEL: v_fshr_i16_5: 3553; GFX8: ; %bb.0: 3554; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3555; GFX8-NEXT: v_lshlrev_b16_e32 v0, 11, v0 3556; GFX8-NEXT: v_lshrrev_b16_e32 v1, 5, v1 3557; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3558; GFX8-NEXT: s_setpc_b64 s[30:31] 3559; 3560; GFX9-LABEL: v_fshr_i16_5: 3561; GFX9: ; %bb.0: 3562; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3563; GFX9-NEXT: v_lshlrev_b16_e32 v0, 11, v0 3564; GFX9-NEXT: v_lshrrev_b16_e32 v1, 5, v1 3565; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3566; GFX9-NEXT: s_setpc_b64 s[30:31] 3567; 3568; GFX10-LABEL: v_fshr_i16_5: 3569; GFX10: ; %bb.0: 3570; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3571; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3572; GFX10-NEXT: v_lshlrev_b16 v0, 11, v0 3573; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1 3574; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3575; GFX10-NEXT: s_setpc_b64 s[30:31] 3576; 3577; GFX11-LABEL: v_fshr_i16_5: 3578; GFX11: ; %bb.0: 3579; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3580; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 3581; GFX11-NEXT: v_lshlrev_b16 v0, 11, v0 3582; GFX11-NEXT: v_lshrrev_b16 v1, 5, v1 3583; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3584; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 3585; GFX11-NEXT: s_setpc_b64 s[30:31] 3586 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5) 3587 ret i16 %result 3588} 3589 3590define amdgpu_ps half @v_fshr_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) { 3591; GFX6-LABEL: v_fshr_i16_ssv: 3592; GFX6: ; %bb.0: 3593; GFX6-NEXT: v_and_b32_e32 v1, 15, v0 3594; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 3595; GFX6-NEXT: v_and_b32_e32 v0, 15, v0 3596; GFX6-NEXT: s_lshl_b32 s0, s0, 1 3597; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 3598; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 3599; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 3600; GFX6-NEXT: s_and_b32 s0, s1, 0xffff 3601; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 3602; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3603; GFX6-NEXT: ; return to shader part epilog 3604; 3605; GFX8-LABEL: v_fshr_i16_ssv: 3606; GFX8: ; %bb.0: 3607; GFX8-NEXT: v_and_b32_e32 v1, 15, v0 3608; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 3609; GFX8-NEXT: s_bfe_u32 s2, 1, 0x100000 3610; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 3611; GFX8-NEXT: s_lshl_b32 s0, s0, s2 3612; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0 3613; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s1 3614; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3615; GFX8-NEXT: ; return to shader part epilog 3616; 3617; GFX9-LABEL: v_fshr_i16_ssv: 3618; GFX9: ; %bb.0: 3619; GFX9-NEXT: v_and_b32_e32 v1, 15, v0 3620; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 3621; GFX9-NEXT: s_bfe_u32 s2, 1, 0x100000 3622; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 3623; GFX9-NEXT: s_lshl_b32 s0, s0, s2 3624; GFX9-NEXT: v_lshlrev_b16_e64 v0, v0, s0 3625; GFX9-NEXT: v_lshrrev_b16_e64 v1, v1, s1 3626; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3627; GFX9-NEXT: ; return to shader part epilog 3628; 3629; GFX10-LABEL: v_fshr_i16_ssv: 3630; GFX10: ; %bb.0: 3631; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 3632; GFX10-NEXT: v_and_b32_e32 v0, 15, v0 3633; GFX10-NEXT: s_bfe_u32 s2, 1, 0x100000 3634; GFX10-NEXT: s_lshl_b32 s0, s0, s2 3635; GFX10-NEXT: v_and_b32_e32 v1, 15, v1 3636; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1 3637; GFX10-NEXT: v_lshlrev_b16 v1, v1, s0 3638; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 3639; GFX10-NEXT: ; return to shader part epilog 3640; 3641; GFX11-LABEL: v_fshr_i16_ssv: 3642; GFX11: ; %bb.0: 3643; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0 3644; GFX11-NEXT: v_and_b32_e32 v0, 15, v0 3645; GFX11-NEXT: s_bfe_u32 s2, 1, 0x100000 3646; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) 3647; GFX11-NEXT: s_lshl_b32 s0, s0, s2 3648; GFX11-NEXT: v_and_b32_e32 v1, 15, v1 3649; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 3650; GFX11-NEXT: v_lshrrev_b16 v0, v0, s1 3651; GFX11-NEXT: v_lshlrev_b16 v1, v1, s0 3652; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3653; GFX11-NEXT: v_or_b32_e32 v0, v1, v0 3654; GFX11-NEXT: ; return to shader part epilog 3655 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 3656 %cast.result = bitcast i16 %result to half 3657 ret half %cast.result 3658} 3659 3660define amdgpu_ps half @v_fshr_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) { 3661; GFX6-LABEL: v_fshr_i16_svs: 3662; GFX6: ; %bb.0: 3663; GFX6-NEXT: s_and_b32 s2, s1, 15 3664; GFX6-NEXT: s_andn2_b32 s1, 15, s1 3665; GFX6-NEXT: s_lshl_b32 s0, s0, 1 3666; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 3667; GFX6-NEXT: s_lshl_b32 s0, s0, s1 3668; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000 3669; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 3670; GFX6-NEXT: v_lshrrev_b32_e32 v0, s1, v0 3671; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 3672; GFX6-NEXT: ; return to shader part epilog 3673; 3674; GFX8-LABEL: v_fshr_i16_svs: 3675; GFX8: ; %bb.0: 3676; GFX8-NEXT: s_and_b32 s2, s1, 15 3677; GFX8-NEXT: s_andn2_b32 s1, 15, s1 3678; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000 3679; GFX8-NEXT: s_lshl_b32 s0, s0, s3 3680; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3681; GFX8-NEXT: s_lshl_b32 s0, s0, s1 3682; GFX8-NEXT: v_lshrrev_b16_e32 v0, s2, v0 3683; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 3684; GFX8-NEXT: ; return to shader part epilog 3685; 3686; GFX9-LABEL: v_fshr_i16_svs: 3687; GFX9: ; %bb.0: 3688; GFX9-NEXT: s_and_b32 s2, s1, 15 3689; GFX9-NEXT: s_andn2_b32 s1, 15, s1 3690; GFX9-NEXT: s_bfe_u32 s3, 1, 0x100000 3691; GFX9-NEXT: s_lshl_b32 s0, s0, s3 3692; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 3693; GFX9-NEXT: s_lshl_b32 s0, s0, s1 3694; GFX9-NEXT: v_lshrrev_b16_e32 v0, s2, v0 3695; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 3696; GFX9-NEXT: ; return to shader part epilog 3697; 3698; GFX10-LABEL: v_fshr_i16_svs: 3699; GFX10: ; %bb.0: 3700; GFX10-NEXT: s_and_b32 s2, s1, 15 3701; GFX10-NEXT: s_bfe_u32 s3, 1, 0x100000 3702; GFX10-NEXT: s_andn2_b32 s1, 15, s1 3703; GFX10-NEXT: v_lshrrev_b16 v0, s2, v0 3704; GFX10-NEXT: s_lshl_b32 s0, s0, s3 3705; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 3706; GFX10-NEXT: s_lshl_b32 s0, s0, s1 3707; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 3708; GFX10-NEXT: ; return to shader part epilog 3709; 3710; GFX11-LABEL: v_fshr_i16_svs: 3711; GFX11: ; %bb.0: 3712; GFX11-NEXT: s_and_b32 s2, s1, 15 3713; GFX11-NEXT: s_bfe_u32 s3, 1, 0x100000 3714; GFX11-NEXT: s_and_not1_b32 s1, 15, s1 3715; GFX11-NEXT: v_lshrrev_b16 v0, s2, v0 3716; GFX11-NEXT: s_lshl_b32 s0, s0, s3 3717; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000 3718; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3719; GFX11-NEXT: s_lshl_b32 s0, s0, s1 3720; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 3721; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 3722; GFX11-NEXT: ; return to shader part epilog 3723 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 3724 %cast.result = bitcast i16 %result to half 3725 ret half %cast.result 3726} 3727 3728define amdgpu_ps half @v_fshr_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) { 3729; GFX6-LABEL: v_fshr_i16_vss: 3730; GFX6: ; %bb.0: 3731; GFX6-NEXT: s_and_b32 s2, s1, 15 3732; GFX6-NEXT: s_andn2_b32 s1, 15, s1 3733; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 3734; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 3735; GFX6-NEXT: v_lshlrev_b32_e32 v0, s1, v0 3736; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000 3737; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 3738; GFX6-NEXT: s_lshr_b32 s0, s0, s1 3739; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 3740; GFX6-NEXT: ; return to shader part epilog 3741; 3742; GFX8-LABEL: v_fshr_i16_vss: 3743; GFX8: ; %bb.0: 3744; GFX8-NEXT: s_and_b32 s2, s1, 15 3745; GFX8-NEXT: s_andn2_b32 s1, 15, s1 3746; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 3747; GFX8-NEXT: v_lshlrev_b16_e32 v0, s1, v0 3748; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 3749; GFX8-NEXT: s_bfe_u32 s1, s2, 0x100000 3750; GFX8-NEXT: s_lshr_b32 s0, s0, s1 3751; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 3752; GFX8-NEXT: ; return to shader part epilog 3753; 3754; GFX9-LABEL: v_fshr_i16_vss: 3755; GFX9: ; %bb.0: 3756; GFX9-NEXT: s_and_b32 s2, s1, 15 3757; GFX9-NEXT: s_andn2_b32 s1, 15, s1 3758; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 3759; GFX9-NEXT: v_lshlrev_b16_e32 v0, s1, v0 3760; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000 3761; GFX9-NEXT: s_bfe_u32 s1, s2, 0x100000 3762; GFX9-NEXT: s_lshr_b32 s0, s0, s1 3763; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 3764; GFX9-NEXT: ; return to shader part epilog 3765; 3766; GFX10-LABEL: v_fshr_i16_vss: 3767; GFX10: ; %bb.0: 3768; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 3769; GFX10-NEXT: s_andn2_b32 s2, 15, s1 3770; GFX10-NEXT: s_and_b32 s1, s1, 15 3771; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000 3772; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 3773; GFX10-NEXT: v_lshlrev_b16 v0, s2, v0 3774; GFX10-NEXT: s_lshr_b32 s0, s0, s1 3775; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 3776; GFX10-NEXT: ; return to shader part epilog 3777; 3778; GFX11-LABEL: v_fshr_i16_vss: 3779; GFX11: ; %bb.0: 3780; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 3781; GFX11-NEXT: s_and_not1_b32 s2, 15, s1 3782; GFX11-NEXT: s_and_b32 s1, s1, 15 3783; GFX11-NEXT: s_bfe_u32 s0, s0, 0x100000 3784; GFX11-NEXT: s_bfe_u32 s1, s1, 0x100000 3785; GFX11-NEXT: v_lshlrev_b16 v0, s2, v0 3786; GFX11-NEXT: s_lshr_b32 s0, s0, s1 3787; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 3788; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 3789; GFX11-NEXT: ; return to shader part epilog 3790 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 3791 %cast.result = bitcast i16 %result to half 3792 ret half %cast.result 3793} 3794 3795define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) { 3796; GFX6-LABEL: s_fshr_v2i16: 3797; GFX6: ; %bb.0: 3798; GFX6-NEXT: s_lshl_b32 s5, s5, 16 3799; GFX6-NEXT: s_and_b32 s4, s4, 0xffff 3800; GFX6-NEXT: s_or_b32 s4, s5, s4 3801; GFX6-NEXT: s_bfe_u32 s5, 1, 0x100000 3802; GFX6-NEXT: s_lshl_b32 s0, s0, s5 3803; GFX6-NEXT: s_bfe_u32 s6, s2, 0xf0001 3804; GFX6-NEXT: s_bfe_u32 s7, 14, 0x100000 3805; GFX6-NEXT: s_lshl_b32 s1, s1, s5 3806; GFX6-NEXT: s_bfe_u32 s5, s3, 0xf0001 3807; GFX6-NEXT: s_lshr_b32 s6, s6, s7 3808; GFX6-NEXT: s_lshr_b32 s5, s5, s7 3809; GFX6-NEXT: s_xor_b32 s4, s4, -1 3810; GFX6-NEXT: s_or_b32 s0, s0, s6 3811; GFX6-NEXT: s_or_b32 s1, s1, s5 3812; GFX6-NEXT: s_lshl_b32 s2, s2, 1 3813; GFX6-NEXT: s_lshr_b32 s5, s4, 16 3814; GFX6-NEXT: s_and_b32 s6, s4, 15 3815; GFX6-NEXT: s_andn2_b32 s4, 15, s4 3816; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000 3817; GFX6-NEXT: s_bfe_u32 s2, s2, 0xf0001 3818; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000 3819; GFX6-NEXT: s_lshl_b32 s0, s0, s6 3820; GFX6-NEXT: s_lshr_b32 s2, s2, s4 3821; GFX6-NEXT: s_or_b32 s0, s0, s2 3822; GFX6-NEXT: s_and_b32 s2, s5, 15 3823; GFX6-NEXT: s_lshl_b32 s3, s3, 1 3824; GFX6-NEXT: s_andn2_b32 s4, 15, s5 3825; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 3826; GFX6-NEXT: s_lshl_b32 s1, s1, s2 3827; GFX6-NEXT: s_bfe_u32 s2, s3, 0xf0001 3828; GFX6-NEXT: s_bfe_u32 s3, s4, 0x100000 3829; GFX6-NEXT: s_lshr_b32 s2, s2, s3 3830; GFX6-NEXT: s_or_b32 s1, s1, s2 3831; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 3832; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 3833; GFX6-NEXT: s_lshl_b32 s1, s1, 16 3834; GFX6-NEXT: s_or_b32 s0, s0, s1 3835; GFX6-NEXT: ; return to shader part epilog 3836; 3837; GFX8-LABEL: s_fshr_v2i16: 3838; GFX8: ; %bb.0: 3839; GFX8-NEXT: s_bfe_u32 s5, 1, 0x100000 3840; GFX8-NEXT: s_bfe_u32 s6, s1, 0x100000 3841; GFX8-NEXT: s_bfe_u32 s7, 15, 0x100000 3842; GFX8-NEXT: s_lshr_b32 s3, s0, 16 3843; GFX8-NEXT: s_lshr_b32 s4, s1, 16 3844; GFX8-NEXT: s_lshl_b32 s0, s0, s5 3845; GFX8-NEXT: s_lshr_b32 s6, s6, s7 3846; GFX8-NEXT: s_or_b32 s0, s0, s6 3847; GFX8-NEXT: s_lshl_b32 s3, s3, s5 3848; GFX8-NEXT: s_lshr_b32 s6, s4, s7 3849; GFX8-NEXT: s_lshl_b32 s1, s1, s5 3850; GFX8-NEXT: s_xor_b32 s2, s2, -1 3851; GFX8-NEXT: s_or_b32 s3, s3, s6 3852; GFX8-NEXT: s_lshr_b32 s6, s2, 16 3853; GFX8-NEXT: s_and_b32 s7, s2, 15 3854; GFX8-NEXT: s_andn2_b32 s2, 15, s2 3855; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3856; GFX8-NEXT: s_bfe_u32 s7, s7, 0x100000 3857; GFX8-NEXT: s_lshr_b32 s1, s1, s5 3858; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3859; GFX8-NEXT: s_lshl_b32 s0, s0, s7 3860; GFX8-NEXT: s_lshr_b32 s1, s1, s2 3861; GFX8-NEXT: s_or_b32 s0, s0, s1 3862; GFX8-NEXT: s_and_b32 s1, s6, 15 3863; GFX8-NEXT: s_lshl_b32 s4, s4, s5 3864; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3865; GFX8-NEXT: s_andn2_b32 s2, 15, s6 3866; GFX8-NEXT: s_lshl_b32 s1, s3, s1 3867; GFX8-NEXT: s_bfe_u32 s3, s4, 0x100000 3868; GFX8-NEXT: s_lshr_b32 s3, s3, s5 3869; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3870; GFX8-NEXT: s_lshr_b32 s2, s3, s2 3871; GFX8-NEXT: s_or_b32 s1, s1, s2 3872; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3873; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 3874; GFX8-NEXT: s_lshl_b32 s1, s1, 16 3875; GFX8-NEXT: s_or_b32 s0, s0, s1 3876; GFX8-NEXT: ; return to shader part epilog 3877; 3878; GFX9-LABEL: s_fshr_v2i16: 3879; GFX9: ; %bb.0: 3880; GFX9-NEXT: s_lshr_b32 s4, s0, 16 3881; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 3882; GFX9-NEXT: s_lshl_b32 s4, s4, 1 3883; GFX9-NEXT: s_and_b32 s3, s2, 0xf000f 3884; GFX9-NEXT: s_andn2_b32 s2, 0xf000f, s2 3885; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 3886; GFX9-NEXT: s_lshr_b32 s4, s0, 16 3887; GFX9-NEXT: s_lshr_b32 s5, s2, 16 3888; GFX9-NEXT: s_lshl_b32 s0, s0, s2 3889; GFX9-NEXT: s_lshl_b32 s2, s4, s5 3890; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3891; GFX9-NEXT: s_lshr_b32 s2, s1, 16 3892; GFX9-NEXT: s_and_b32 s1, s1, 0xffff 3893; GFX9-NEXT: s_lshr_b32 s4, s3, 16 3894; GFX9-NEXT: s_lshr_b32 s1, s1, s3 3895; GFX9-NEXT: s_lshr_b32 s2, s2, s4 3896; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2 3897; GFX9-NEXT: s_or_b32 s0, s0, s1 3898; GFX9-NEXT: ; return to shader part epilog 3899; 3900; GFX10-LABEL: s_fshr_v2i16: 3901; GFX10: ; %bb.0: 3902; GFX10-NEXT: s_lshr_b32 s3, s0, 16 3903; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 3904; GFX10-NEXT: s_lshl_b32 s3, s3, 1 3905; GFX10-NEXT: s_and_b32 s4, s2, 0xf000f 3906; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s3 3907; GFX10-NEXT: s_andn2_b32 s2, 0xf000f, s2 3908; GFX10-NEXT: s_lshr_b32 s3, s0, 16 3909; GFX10-NEXT: s_lshr_b32 s5, s2, 16 3910; GFX10-NEXT: s_lshl_b32 s0, s0, s2 3911; GFX10-NEXT: s_lshl_b32 s2, s3, s5 3912; GFX10-NEXT: s_lshr_b32 s3, s1, 16 3913; GFX10-NEXT: s_and_b32 s1, s1, 0xffff 3914; GFX10-NEXT: s_lshr_b32 s5, s4, 16 3915; GFX10-NEXT: s_lshr_b32 s1, s1, s4 3916; GFX10-NEXT: s_lshr_b32 s3, s3, s5 3917; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3918; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s3 3919; GFX10-NEXT: s_or_b32 s0, s0, s1 3920; GFX10-NEXT: ; return to shader part epilog 3921; 3922; GFX11-LABEL: s_fshr_v2i16: 3923; GFX11: ; %bb.0: 3924; GFX11-NEXT: s_lshr_b32 s3, s0, 16 3925; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001 3926; GFX11-NEXT: s_lshl_b32 s3, s3, 1 3927; GFX11-NEXT: s_and_b32 s4, s2, 0xf000f 3928; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s3 3929; GFX11-NEXT: s_and_not1_b32 s2, 0xf000f, s2 3930; GFX11-NEXT: s_lshr_b32 s3, s0, 16 3931; GFX11-NEXT: s_lshr_b32 s5, s2, 16 3932; GFX11-NEXT: s_lshl_b32 s0, s0, s2 3933; GFX11-NEXT: s_lshl_b32 s2, s3, s5 3934; GFX11-NEXT: s_lshr_b32 s3, s1, 16 3935; GFX11-NEXT: s_and_b32 s1, s1, 0xffff 3936; GFX11-NEXT: s_lshr_b32 s5, s4, 16 3937; GFX11-NEXT: s_lshr_b32 s1, s1, s4 3938; GFX11-NEXT: s_lshr_b32 s3, s3, s5 3939; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3940; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s3 3941; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3942; GFX11-NEXT: s_or_b32 s0, s0, s1 3943; GFX11-NEXT: ; return to shader part epilog 3944 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 3945 %cast = bitcast <2 x i16> %result to i32 3946 ret i32 %cast 3947} 3948 3949define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) { 3950; GFX6-LABEL: v_fshr_v2i16: 3951; GFX6: ; %bb.0: 3952; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3953; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5 3954; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 3955; GFX6-NEXT: v_or_b32_e32 v4, v5, v4 3956; GFX6-NEXT: s_bfe_u32 s4, 1, 0x100000 3957; GFX6-NEXT: v_bfe_u32 v5, v2, 1, 15 3958; GFX6-NEXT: s_bfe_u32 s5, 14, 0x100000 3959; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0 3960; GFX6-NEXT: v_lshrrev_b32_e32 v5, s5, v5 3961; GFX6-NEXT: v_or_b32_e32 v0, v0, v5 3962; GFX6-NEXT: v_bfe_u32 v5, v3, 1, 15 3963; GFX6-NEXT: v_lshlrev_b32_e32 v1, s4, v1 3964; GFX6-NEXT: v_lshrrev_b32_e32 v5, s5, v5 3965; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 3966; GFX6-NEXT: v_or_b32_e32 v1, v1, v5 3967; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v4 3968; GFX6-NEXT: v_and_b32_e32 v6, 15, v4 3969; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 3970; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2 3971; GFX6-NEXT: v_and_b32_e32 v4, 15, v4 3972; GFX6-NEXT: v_bfe_u32 v6, v6, 0, 16 3973; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15 3974; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16 3975; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0 3976; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 3977; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 3978; GFX6-NEXT: v_and_b32_e32 v2, 15, v5 3979; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5 3980; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 3981; GFX6-NEXT: v_and_b32_e32 v4, 15, v4 3982; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16 3983; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1 3984; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15 3985; GFX6-NEXT: v_bfe_u32 v3, v4, 0, 16 3986; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2 3987; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 3988; GFX6-NEXT: s_setpc_b64 s[30:31] 3989; 3990; GFX8-LABEL: v_fshr_v2i16: 3991; GFX8: ; %bb.0: 3992; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3993; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v0 3994; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v1 3995; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 3996; GFX8-NEXT: v_mov_b32_e32 v4, 1 3997; GFX8-NEXT: v_mov_b32_e32 v5, 15 3998; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3999; GFX8-NEXT: v_lshrrev_b16_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4000; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 4001; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 4002; GFX8-NEXT: v_lshlrev_b16_e32 v5, 1, v1 4003; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4004; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 4005; GFX8-NEXT: v_and_b32_e32 v6, 15, v2 4006; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 4007; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 4008; GFX8-NEXT: v_lshrrev_b16_e32 v5, 1, v5 4009; GFX8-NEXT: v_lshlrev_b16_e32 v3, v6, v3 4010; GFX8-NEXT: v_lshrrev_b16_e32 v2, v2, v5 4011; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 4012; GFX8-NEXT: v_and_b32_e32 v3, 15, v4 4013; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 4014; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 4015; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v1 4016; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0 4017; GFX8-NEXT: v_lshrrev_b16_e32 v1, v4, v1 4018; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4019; GFX8-NEXT: v_mov_b32_e32 v1, 16 4020; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 4021; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4022; GFX8-NEXT: s_setpc_b64 s[30:31] 4023; 4024; GFX9-LABEL: v_fshr_v2i16: 4025; GFX9: ; %bb.0: 4026; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4027; GFX9-NEXT: v_and_b32_e32 v3, 0xf000f, v2 4028; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 4029; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v2 4030; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4031; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0 4032; GFX9-NEXT: v_pk_lshrrev_b16 v1, v3, v1 4033; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 4034; GFX9-NEXT: s_setpc_b64 s[30:31] 4035; 4036; GFX10-LABEL: v_fshr_v2i16: 4037; GFX10: ; %bb.0: 4038; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4039; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4040; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 4041; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4042; GFX10-NEXT: v_and_b32_e32 v2, 0xf000f, v2 4043; GFX10-NEXT: v_and_b32_e32 v3, 0xf000f, v3 4044; GFX10-NEXT: v_pk_lshrrev_b16 v1, v2, v1 4045; GFX10-NEXT: v_pk_lshlrev_b16 v0, v3, v0 4046; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 4047; GFX10-NEXT: s_setpc_b64 s[30:31] 4048; 4049; GFX11-LABEL: v_fshr_v2i16: 4050; GFX11: ; %bb.0: 4051; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4052; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4053; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2 4054; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4055; GFX11-NEXT: v_and_b32_e32 v2, 0xf000f, v2 4056; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 4057; GFX11-NEXT: v_and_b32_e32 v3, 0xf000f, v3 4058; GFX11-NEXT: v_pk_lshrrev_b16 v1, v2, v1 4059; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 4060; GFX11-NEXT: v_pk_lshlrev_b16 v0, v3, v0 4061; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 4062; GFX11-NEXT: s_setpc_b64 s[30:31] 4063 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 4064 ret <2 x i16> %result 4065} 4066 4067define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) { 4068; GFX6-LABEL: v_fshr_v2i16_4_8: 4069; GFX6: ; %bb.0: 4070; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4071; GFX6-NEXT: s_bfe_u32 s4, 12, 0x100000 4072; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0 4073; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15 4074; GFX6-NEXT: s_bfe_u32 s4, 3, 0x100000 4075; GFX6-NEXT: v_lshrrev_b32_e32 v2, s4, v2 4076; GFX6-NEXT: s_bfe_u32 s4, 8, 0x100000 4077; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 4078; GFX6-NEXT: v_lshlrev_b32_e32 v1, s4, v1 4079; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15 4080; GFX6-NEXT: s_bfe_u32 s4, 7, 0x100000 4081; GFX6-NEXT: v_lshrrev_b32_e32 v2, s4, v2 4082; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 4083; GFX6-NEXT: s_setpc_b64 s[30:31] 4084; 4085; GFX8-LABEL: v_fshr_v2i16_4_8: 4086; GFX8: ; %bb.0: 4087; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4088; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0 4089; GFX8-NEXT: v_lshlrev_b16_e32 v0, 12, v0 4090; GFX8-NEXT: v_lshrrev_b16_e32 v3, 4, v1 4091; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 4092; GFX8-NEXT: v_mov_b32_e32 v3, 8 4093; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v2 4094; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4095; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 4096; GFX8-NEXT: v_mov_b32_e32 v2, 16 4097; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 4098; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4099; GFX8-NEXT: s_setpc_b64 s[30:31] 4100; 4101; GFX9-LABEL: v_fshr_v2i16_4_8: 4102; GFX9: ; %bb.0: 4103; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4104; GFX9-NEXT: v_mov_b32_e32 v2, 0x8000c 4105; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0 4106; GFX9-NEXT: v_mov_b32_e32 v2, 0x80004 4107; GFX9-NEXT: v_pk_lshrrev_b16 v1, v2, v1 4108; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 4109; GFX9-NEXT: s_setpc_b64 s[30:31] 4110; 4111; GFX10-LABEL: v_fshr_v2i16_4_8: 4112; GFX10: ; %bb.0: 4113; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4114; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4115; GFX10-NEXT: v_pk_lshlrev_b16 v0, 0x8000c, v0 4116; GFX10-NEXT: v_pk_lshrrev_b16 v1, 0x80004, v1 4117; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 4118; GFX10-NEXT: s_setpc_b64 s[30:31] 4119; 4120; GFX11-LABEL: v_fshr_v2i16_4_8: 4121; GFX11: ; %bb.0: 4122; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4123; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4124; GFX11-NEXT: v_pk_lshlrev_b16 v0, 0x8000c, v0 4125; GFX11-NEXT: v_pk_lshrrev_b16 v1, 0x80004, v1 4126; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4127; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 4128; GFX11-NEXT: s_setpc_b64 s[30:31] 4129 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> <i16 4, i16 8>) 4130 ret <2 x i16> %result 4131} 4132 4133define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) { 4134; GFX6-LABEL: v_fshr_v2i16_ssv: 4135; GFX6: ; %bb.0: 4136; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4137; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 4138; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 4139; GFX6-NEXT: s_bfe_u32 s4, 1, 0x100000 4140; GFX6-NEXT: s_bfe_u32 s5, s2, 0xf0001 4141; GFX6-NEXT: s_bfe_u32 s6, 14, 0x100000 4142; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 4143; GFX6-NEXT: s_lshl_b32 s0, s0, s4 4144; GFX6-NEXT: s_lshr_b32 s5, s5, s6 4145; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 4146; GFX6-NEXT: v_and_b32_e32 v2, 15, v0 4147; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 4148; GFX6-NEXT: s_or_b32 s0, s0, s5 4149; GFX6-NEXT: s_lshl_b32 s2, s2, 1 4150; GFX6-NEXT: v_and_b32_e32 v0, 15, v0 4151; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16 4152; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 4153; GFX6-NEXT: s_bfe_u32 s0, s2, 0xf0001 4154; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 4155; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0 4156; GFX6-NEXT: s_lshl_b32 s1, s1, s4 4157; GFX6-NEXT: s_bfe_u32 s4, s3, 0xf0001 4158; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 4159; GFX6-NEXT: v_and_b32_e32 v2, 15, v1 4160; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1 4161; GFX6-NEXT: s_lshr_b32 s4, s4, s6 4162; GFX6-NEXT: s_lshl_b32 s3, s3, 1 4163; GFX6-NEXT: v_and_b32_e32 v1, 15, v1 4164; GFX6-NEXT: s_or_b32 s1, s1, s4 4165; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16 4166; GFX6-NEXT: s_bfe_u32 s0, s3, 0xf0001 4167; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 4168; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2 4169; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 4170; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 4171; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 4172; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 4173; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4174; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 4175; GFX6-NEXT: ; return to shader part epilog 4176; 4177; GFX8-LABEL: v_fshr_v2i16_ssv: 4178; GFX8: ; %bb.0: 4179; GFX8-NEXT: s_bfe_u32 s4, 1, 0x100000 4180; GFX8-NEXT: s_bfe_u32 s5, s1, 0x100000 4181; GFX8-NEXT: s_bfe_u32 s6, 15, 0x100000 4182; GFX8-NEXT: s_lshr_b32 s2, s0, 16 4183; GFX8-NEXT: s_lshl_b32 s0, s0, s4 4184; GFX8-NEXT: s_lshr_b32 s5, s5, s6 4185; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 4186; GFX8-NEXT: s_lshr_b32 s3, s1, 16 4187; GFX8-NEXT: s_or_b32 s0, s0, s5 4188; GFX8-NEXT: s_lshl_b32 s1, s1, s4 4189; GFX8-NEXT: v_and_b32_e32 v2, 15, v0 4190; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0 4191; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 4192; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s0 4193; GFX8-NEXT: s_bfe_u32 s0, s1, 0x100000 4194; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 4195; GFX8-NEXT: s_lshr_b32 s0, s0, s4 4196; GFX8-NEXT: s_lshr_b32 s5, s3, s6 4197; GFX8-NEXT: s_lshl_b32 s3, s3, s4 4198; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0 4199; GFX8-NEXT: s_lshl_b32 s2, s2, s4 4200; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 4201; GFX8-NEXT: v_and_b32_e32 v2, 15, v1 4202; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1 4203; GFX8-NEXT: s_bfe_u32 s0, s3, 0x100000 4204; GFX8-NEXT: s_or_b32 s2, s2, s5 4205; GFX8-NEXT: v_and_b32_e32 v1, 15, v1 4206; GFX8-NEXT: s_lshr_b32 s0, s0, s4 4207; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s2 4208; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s0 4209; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 4210; GFX8-NEXT: v_mov_b32_e32 v2, 16 4211; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 4212; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4213; GFX8-NEXT: ; return to shader part epilog 4214; 4215; GFX9-LABEL: v_fshr_v2i16_ssv: 4216; GFX9: ; %bb.0: 4217; GFX9-NEXT: s_lshr_b32 s2, s0, 16 4218; GFX9-NEXT: v_and_b32_e32 v1, 0xf000f, v0 4219; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 4220; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 4221; GFX9-NEXT: s_lshl_b32 s2, s2, 1 4222; GFX9-NEXT: v_and_b32_e32 v0, 0xf000f, v0 4223; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 4224; GFX9-NEXT: v_pk_lshlrev_b16 v0, v0, s0 4225; GFX9-NEXT: v_pk_lshrrev_b16 v1, v1, s1 4226; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 4227; GFX9-NEXT: ; return to shader part epilog 4228; 4229; GFX10-LABEL: v_fshr_v2i16_ssv: 4230; GFX10: ; %bb.0: 4231; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 4232; GFX10-NEXT: s_lshr_b32 s2, s0, 16 4233; GFX10-NEXT: v_and_b32_e32 v0, 0xf000f, v0 4234; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 4235; GFX10-NEXT: s_lshl_b32 s2, s2, 1 4236; GFX10-NEXT: v_and_b32_e32 v1, 0xf000f, v1 4237; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 4238; GFX10-NEXT: v_pk_lshrrev_b16 v0, v0, s1 4239; GFX10-NEXT: v_pk_lshlrev_b16 v1, v1, s0 4240; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 4241; GFX10-NEXT: ; return to shader part epilog 4242; 4243; GFX11-LABEL: v_fshr_v2i16_ssv: 4244; GFX11: ; %bb.0: 4245; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0 4246; GFX11-NEXT: s_lshr_b32 s2, s0, 16 4247; GFX11-NEXT: v_and_b32_e32 v0, 0xf000f, v0 4248; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001 4249; GFX11-NEXT: s_lshl_b32 s2, s2, 1 4250; GFX11-NEXT: v_and_b32_e32 v1, 0xf000f, v1 4251; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2 4252; GFX11-NEXT: v_pk_lshrrev_b16 v0, v0, s1 4253; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 4254; GFX11-NEXT: v_pk_lshlrev_b16 v1, v1, s0 4255; GFX11-NEXT: v_or_b32_e32 v0, v1, v0 4256; GFX11-NEXT: ; return to shader part epilog 4257 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 4258 %cast = bitcast <2 x i16> %result to float 4259 ret float %cast 4260} 4261 4262define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) { 4263; GFX6-LABEL: v_fshr_v2i16_svs: 4264; GFX6: ; %bb.0: 4265; GFX6-NEXT: s_lshl_b32 s3, s3, 16 4266; GFX6-NEXT: s_and_b32 s2, s2, 0xffff 4267; GFX6-NEXT: s_or_b32 s2, s3, s2 4268; GFX6-NEXT: s_bfe_u32 s3, 1, 0x100000 4269; GFX6-NEXT: v_bfe_u32 v2, v0, 1, 15 4270; GFX6-NEXT: s_bfe_u32 s4, 14, 0x100000 4271; GFX6-NEXT: s_lshl_b32 s0, s0, s3 4272; GFX6-NEXT: v_lshrrev_b32_e32 v2, s4, v2 4273; GFX6-NEXT: v_bfe_u32 v3, v1, 1, 15 4274; GFX6-NEXT: v_or_b32_e32 v2, s0, v2 4275; GFX6-NEXT: s_lshl_b32 s0, s1, s3 4276; GFX6-NEXT: v_lshrrev_b32_e32 v3, s4, v3 4277; GFX6-NEXT: v_or_b32_e32 v3, s0, v3 4278; GFX6-NEXT: s_xor_b32 s0, s2, -1 4279; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 4280; GFX6-NEXT: s_lshr_b32 s1, s0, 16 4281; GFX6-NEXT: s_and_b32 s2, s0, 15 4282; GFX6-NEXT: s_andn2_b32 s0, 15, s0 4283; GFX6-NEXT: v_bfe_u32 v0, v0, 1, 15 4284; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 4285; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 4286; GFX6-NEXT: v_lshrrev_b32_e32 v0, s0, v0 4287; GFX6-NEXT: s_and_b32 s0, s1, 15 4288; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 4289; GFX6-NEXT: v_lshlrev_b32_e32 v2, s2, v2 4290; GFX6-NEXT: s_andn2_b32 s1, 15, s1 4291; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 4292; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 4293; GFX6-NEXT: v_lshlrev_b32_e32 v2, s0, v3 4294; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15 4295; GFX6-NEXT: s_bfe_u32 s0, s1, 0x100000 4296; GFX6-NEXT: v_lshrrev_b32_e32 v1, s0, v1 4297; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 4298; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 4299; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 4300; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4301; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 4302; GFX6-NEXT: ; return to shader part epilog 4303; 4304; GFX8-LABEL: v_fshr_v2i16_svs: 4305; GFX8: ; %bb.0: 4306; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000 4307; GFX8-NEXT: s_lshr_b32 s2, s0, 16 4308; GFX8-NEXT: s_lshl_b32 s0, s0, s3 4309; GFX8-NEXT: v_lshrrev_b16_e32 v1, 15, v0 4310; GFX8-NEXT: v_mov_b32_e32 v2, 15 4311; GFX8-NEXT: v_or_b32_e32 v1, s0, v1 4312; GFX8-NEXT: s_lshl_b32 s0, s2, s3 4313; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4314; GFX8-NEXT: v_or_b32_e32 v2, s0, v2 4315; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v0 4316; GFX8-NEXT: v_mov_b32_e32 v4, 1 4317; GFX8-NEXT: s_xor_b32 s0, s1, -1 4318; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4319; GFX8-NEXT: s_lshr_b32 s1, s0, 16 4320; GFX8-NEXT: s_and_b32 s2, s0, 15 4321; GFX8-NEXT: s_andn2_b32 s0, 15, s0 4322; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3 4323; GFX8-NEXT: v_lshrrev_b16_e32 v3, s0, v3 4324; GFX8-NEXT: s_and_b32 s0, s1, 15 4325; GFX8-NEXT: s_andn2_b32 s1, 15, s1 4326; GFX8-NEXT: v_lshrrev_b16_e32 v0, 1, v0 4327; GFX8-NEXT: v_lshlrev_b16_e32 v2, s0, v2 4328; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0 4329; GFX8-NEXT: v_lshlrev_b16_e32 v1, s2, v1 4330; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 4331; GFX8-NEXT: v_mov_b32_e32 v2, 16 4332; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 4333; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 4334; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4335; GFX8-NEXT: ; return to shader part epilog 4336; 4337; GFX9-LABEL: v_fshr_v2i16_svs: 4338; GFX9: ; %bb.0: 4339; GFX9-NEXT: s_lshr_b32 s3, s0, 16 4340; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 4341; GFX9-NEXT: s_lshl_b32 s3, s3, 1 4342; GFX9-NEXT: s_and_b32 s2, s1, 0xf000f 4343; GFX9-NEXT: s_andn2_b32 s1, 0xf000f, s1 4344; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3 4345; GFX9-NEXT: s_lshr_b32 s3, s0, 16 4346; GFX9-NEXT: s_lshr_b32 s4, s1, 16 4347; GFX9-NEXT: s_lshl_b32 s0, s0, s1 4348; GFX9-NEXT: s_lshl_b32 s1, s3, s4 4349; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4350; GFX9-NEXT: v_pk_lshrrev_b16 v0, s2, v0 4351; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 4352; GFX9-NEXT: ; return to shader part epilog 4353; 4354; GFX10-LABEL: v_fshr_v2i16_svs: 4355; GFX10: ; %bb.0: 4356; GFX10-NEXT: s_lshr_b32 s2, s0, 16 4357; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 4358; GFX10-NEXT: s_lshl_b32 s2, s2, 1 4359; GFX10-NEXT: s_and_b32 s3, s1, 0xf000f 4360; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 4361; GFX10-NEXT: s_andn2_b32 s1, 0xf000f, s1 4362; GFX10-NEXT: s_lshr_b32 s2, s0, 16 4363; GFX10-NEXT: s_lshr_b32 s4, s1, 16 4364; GFX10-NEXT: v_pk_lshrrev_b16 v0, s3, v0 4365; GFX10-NEXT: s_lshl_b32 s0, s0, s1 4366; GFX10-NEXT: s_lshl_b32 s1, s2, s4 4367; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4368; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 4369; GFX10-NEXT: ; return to shader part epilog 4370; 4371; GFX11-LABEL: v_fshr_v2i16_svs: 4372; GFX11: ; %bb.0: 4373; GFX11-NEXT: s_lshr_b32 s2, s0, 16 4374; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001 4375; GFX11-NEXT: s_lshl_b32 s2, s2, 1 4376; GFX11-NEXT: s_and_b32 s3, s1, 0xf000f 4377; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2 4378; GFX11-NEXT: s_and_not1_b32 s1, 0xf000f, s1 4379; GFX11-NEXT: s_lshr_b32 s2, s0, 16 4380; GFX11-NEXT: s_lshr_b32 s4, s1, 16 4381; GFX11-NEXT: v_pk_lshrrev_b16 v0, s3, v0 4382; GFX11-NEXT: s_lshl_b32 s0, s0, s1 4383; GFX11-NEXT: s_lshl_b32 s1, s2, s4 4384; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 4385; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4386; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 4387; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 4388; GFX11-NEXT: ; return to shader part epilog 4389 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 4390 %cast = bitcast <2 x i16> %result to float 4391 ret float %cast 4392} 4393 4394define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) { 4395; GFX6-LABEL: v_fshr_v2i16_vss: 4396; GFX6: ; %bb.0: 4397; GFX6-NEXT: s_lshl_b32 s3, s3, 16 4398; GFX6-NEXT: s_and_b32 s2, s2, 0xffff 4399; GFX6-NEXT: s_or_b32 s2, s3, s2 4400; GFX6-NEXT: s_bfe_u32 s3, 1, 0x100000 4401; GFX6-NEXT: v_lshlrev_b32_e32 v0, s3, v0 4402; GFX6-NEXT: s_bfe_u32 s4, s0, 0xf0001 4403; GFX6-NEXT: s_bfe_u32 s5, 14, 0x100000 4404; GFX6-NEXT: v_lshlrev_b32_e32 v1, s3, v1 4405; GFX6-NEXT: s_bfe_u32 s3, s1, 0xf0001 4406; GFX6-NEXT: s_lshr_b32 s4, s4, s5 4407; GFX6-NEXT: s_lshr_b32 s3, s3, s5 4408; GFX6-NEXT: s_xor_b32 s2, s2, -1 4409; GFX6-NEXT: v_or_b32_e32 v0, s4, v0 4410; GFX6-NEXT: v_or_b32_e32 v1, s3, v1 4411; GFX6-NEXT: s_lshl_b32 s0, s0, 1 4412; GFX6-NEXT: s_lshr_b32 s3, s2, 16 4413; GFX6-NEXT: s_and_b32 s4, s2, 15 4414; GFX6-NEXT: s_andn2_b32 s2, 15, s2 4415; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000 4416; GFX6-NEXT: s_bfe_u32 s0, s0, 0xf0001 4417; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 4418; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0 4419; GFX6-NEXT: s_lshr_b32 s0, s0, s2 4420; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 4421; GFX6-NEXT: s_and_b32 s0, s3, 15 4422; GFX6-NEXT: s_lshl_b32 s1, s1, 1 4423; GFX6-NEXT: s_andn2_b32 s2, 15, s3 4424; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 4425; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1 4426; GFX6-NEXT: s_bfe_u32 s0, s1, 0xf0001 4427; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000 4428; GFX6-NEXT: s_lshr_b32 s0, s0, s1 4429; GFX6-NEXT: v_or_b32_e32 v1, s0, v1 4430; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 4431; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 4432; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4433; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 4434; GFX6-NEXT: ; return to shader part epilog 4435; 4436; GFX8-LABEL: v_fshr_v2i16_vss: 4437; GFX8: ; %bb.0: 4438; GFX8-NEXT: s_bfe_u32 s3, s0, 0x100000 4439; GFX8-NEXT: s_bfe_u32 s4, 15, 0x100000 4440; GFX8-NEXT: s_lshr_b32 s2, s0, 16 4441; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v0 4442; GFX8-NEXT: s_lshr_b32 s3, s3, s4 4443; GFX8-NEXT: v_mov_b32_e32 v2, 1 4444; GFX8-NEXT: v_or_b32_e32 v1, s3, v1 4445; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4446; GFX8-NEXT: s_lshr_b32 s3, s2, s4 4447; GFX8-NEXT: v_or_b32_e32 v0, s3, v0 4448; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000 4449; GFX8-NEXT: s_lshl_b32 s0, s0, s3 4450; GFX8-NEXT: s_xor_b32 s1, s1, -1 4451; GFX8-NEXT: s_lshr_b32 s4, s1, 16 4452; GFX8-NEXT: s_and_b32 s5, s1, 15 4453; GFX8-NEXT: s_andn2_b32 s1, 15, s1 4454; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 4455; GFX8-NEXT: s_lshr_b32 s0, s0, s3 4456; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 4457; GFX8-NEXT: v_lshlrev_b16_e32 v1, s5, v1 4458; GFX8-NEXT: s_lshr_b32 s0, s0, s1 4459; GFX8-NEXT: s_lshl_b32 s2, s2, s3 4460; GFX8-NEXT: v_or_b32_e32 v1, s0, v1 4461; GFX8-NEXT: s_and_b32 s0, s4, 15 4462; GFX8-NEXT: s_andn2_b32 s1, 15, s4 4463; GFX8-NEXT: v_lshlrev_b16_e32 v0, s0, v0 4464; GFX8-NEXT: s_bfe_u32 s0, s2, 0x100000 4465; GFX8-NEXT: s_lshr_b32 s0, s0, s3 4466; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 4467; GFX8-NEXT: s_lshr_b32 s0, s0, s1 4468; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 4469; GFX8-NEXT: v_mov_b32_e32 v2, 16 4470; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 4471; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4472; GFX8-NEXT: ; return to shader part epilog 4473; 4474; GFX9-LABEL: v_fshr_v2i16_vss: 4475; GFX9: ; %bb.0: 4476; GFX9-NEXT: s_and_b32 s2, s1, 0xf000f 4477; GFX9-NEXT: s_andn2_b32 s1, 0xf000f, s1 4478; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4479; GFX9-NEXT: v_pk_lshlrev_b16 v0, s1, v0 4480; GFX9-NEXT: s_lshr_b32 s1, s0, 16 4481; GFX9-NEXT: s_and_b32 s0, s0, 0xffff 4482; GFX9-NEXT: s_lshr_b32 s3, s2, 16 4483; GFX9-NEXT: s_lshr_b32 s0, s0, s2 4484; GFX9-NEXT: s_lshr_b32 s1, s1, s3 4485; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4486; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 4487; GFX9-NEXT: ; return to shader part epilog 4488; 4489; GFX10-LABEL: v_fshr_v2i16_vss: 4490; GFX10: ; %bb.0: 4491; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4492; GFX10-NEXT: s_and_b32 s2, s1, 0xf000f 4493; GFX10-NEXT: s_andn2_b32 s1, 0xf000f, s1 4494; GFX10-NEXT: s_lshr_b32 s3, s0, 16 4495; GFX10-NEXT: s_and_b32 s0, s0, 0xffff 4496; GFX10-NEXT: s_lshr_b32 s4, s2, 16 4497; GFX10-NEXT: v_pk_lshlrev_b16 v0, s1, v0 4498; GFX10-NEXT: s_lshr_b32 s0, s0, s2 4499; GFX10-NEXT: s_lshr_b32 s1, s3, s4 4500; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4501; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 4502; GFX10-NEXT: ; return to shader part epilog 4503; 4504; GFX11-LABEL: v_fshr_v2i16_vss: 4505; GFX11: ; %bb.0: 4506; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4507; GFX11-NEXT: s_and_b32 s2, s1, 0xf000f 4508; GFX11-NEXT: s_and_not1_b32 s1, 0xf000f, s1 4509; GFX11-NEXT: s_lshr_b32 s3, s0, 16 4510; GFX11-NEXT: s_and_b32 s0, s0, 0xffff 4511; GFX11-NEXT: s_lshr_b32 s4, s2, 16 4512; GFX11-NEXT: v_pk_lshlrev_b16 v0, s1, v0 4513; GFX11-NEXT: s_lshr_b32 s0, s0, s2 4514; GFX11-NEXT: s_lshr_b32 s1, s3, s4 4515; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 4516; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4517; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 4518; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 4519; GFX11-NEXT: ; return to shader part epilog 4520 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 4521 %cast = bitcast <2 x i16> %result to float 4522 ret float %cast 4523} 4524 4525; ; FIXME 4526; define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) { 4527; %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) 4528; %cast = bitcast <3 x i16> %result to i48 4529; ret i48 %cast 4530; } 4531 4532; ; FIXME 4533; define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) { 4534; %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) 4535; %cast.result = bitcast <3 x i16> %result to <3 x half> 4536; ret <3 x half> %cast.result 4537; } 4538 4539define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) { 4540; GFX6-LABEL: s_fshr_v4i16: 4541; GFX6: ; %bb.0: 4542; GFX6-NEXT: s_lshl_b32 s9, s9, 16 4543; GFX6-NEXT: s_and_b32 s8, s8, 0xffff 4544; GFX6-NEXT: s_or_b32 s8, s9, s8 4545; GFX6-NEXT: s_lshl_b32 s9, s11, 16 4546; GFX6-NEXT: s_and_b32 s10, s10, 0xffff 4547; GFX6-NEXT: s_or_b32 s9, s9, s10 4548; GFX6-NEXT: s_bfe_u32 s10, 1, 0x100000 4549; GFX6-NEXT: s_bfe_u32 s11, s4, 0xf0001 4550; GFX6-NEXT: s_bfe_u32 s12, 14, 0x100000 4551; GFX6-NEXT: s_lshl_b32 s0, s0, s10 4552; GFX6-NEXT: s_lshr_b32 s11, s11, s12 4553; GFX6-NEXT: s_or_b32 s0, s0, s11 4554; GFX6-NEXT: s_bfe_u32 s11, s5, 0xf0001 4555; GFX6-NEXT: s_lshl_b32 s1, s1, s10 4556; GFX6-NEXT: s_lshr_b32 s11, s11, s12 4557; GFX6-NEXT: s_xor_b32 s8, s8, -1 4558; GFX6-NEXT: s_or_b32 s1, s1, s11 4559; GFX6-NEXT: s_lshl_b32 s4, s4, 1 4560; GFX6-NEXT: s_lshr_b32 s11, s8, 16 4561; GFX6-NEXT: s_and_b32 s13, s8, 15 4562; GFX6-NEXT: s_andn2_b32 s8, 15, s8 4563; GFX6-NEXT: s_bfe_u32 s13, s13, 0x100000 4564; GFX6-NEXT: s_bfe_u32 s4, s4, 0xf0001 4565; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 4566; GFX6-NEXT: s_lshl_b32 s0, s0, s13 4567; GFX6-NEXT: s_lshr_b32 s4, s4, s8 4568; GFX6-NEXT: s_or_b32 s0, s0, s4 4569; GFX6-NEXT: s_and_b32 s4, s11, 15 4570; GFX6-NEXT: s_lshl_b32 s5, s5, 1 4571; GFX6-NEXT: s_andn2_b32 s8, 15, s11 4572; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000 4573; GFX6-NEXT: s_lshl_b32 s1, s1, s4 4574; GFX6-NEXT: s_bfe_u32 s4, s5, 0xf0001 4575; GFX6-NEXT: s_bfe_u32 s5, s8, 0x100000 4576; GFX6-NEXT: s_lshr_b32 s4, s4, s5 4577; GFX6-NEXT: s_or_b32 s1, s1, s4 4578; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 4579; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 4580; GFX6-NEXT: s_lshl_b32 s1, s1, 16 4581; GFX6-NEXT: s_or_b32 s0, s0, s1 4582; GFX6-NEXT: s_lshl_b32 s1, s2, s10 4583; GFX6-NEXT: s_bfe_u32 s2, s6, 0xf0001 4584; GFX6-NEXT: s_lshr_b32 s2, s2, s12 4585; GFX6-NEXT: s_or_b32 s1, s1, s2 4586; GFX6-NEXT: s_lshl_b32 s2, s3, s10 4587; GFX6-NEXT: s_bfe_u32 s3, s7, 0xf0001 4588; GFX6-NEXT: s_lshr_b32 s3, s3, s12 4589; GFX6-NEXT: s_xor_b32 s5, s9, -1 4590; GFX6-NEXT: s_or_b32 s2, s2, s3 4591; GFX6-NEXT: s_lshl_b32 s3, s6, 1 4592; GFX6-NEXT: s_lshl_b32 s4, s7, 1 4593; GFX6-NEXT: s_lshr_b32 s6, s5, 16 4594; GFX6-NEXT: s_and_b32 s7, s5, 15 4595; GFX6-NEXT: s_andn2_b32 s5, 15, s5 4596; GFX6-NEXT: s_bfe_u32 s7, s7, 0x100000 4597; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001 4598; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000 4599; GFX6-NEXT: s_lshl_b32 s1, s1, s7 4600; GFX6-NEXT: s_lshr_b32 s3, s3, s5 4601; GFX6-NEXT: s_or_b32 s1, s1, s3 4602; GFX6-NEXT: s_and_b32 s3, s6, 15 4603; GFX6-NEXT: s_andn2_b32 s5, 15, s6 4604; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000 4605; GFX6-NEXT: s_lshl_b32 s2, s2, s3 4606; GFX6-NEXT: s_bfe_u32 s3, s4, 0xf0001 4607; GFX6-NEXT: s_bfe_u32 s4, s5, 0x100000 4608; GFX6-NEXT: s_lshr_b32 s3, s3, s4 4609; GFX6-NEXT: s_or_b32 s2, s2, s3 4610; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 4611; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 4612; GFX6-NEXT: s_lshl_b32 s2, s2, 16 4613; GFX6-NEXT: s_or_b32 s1, s1, s2 4614; GFX6-NEXT: ; return to shader part epilog 4615; 4616; GFX8-LABEL: s_fshr_v4i16: 4617; GFX8: ; %bb.0: 4618; GFX8-NEXT: s_bfe_u32 s8, 1, 0x100000 4619; GFX8-NEXT: s_bfe_u32 s9, s2, 0x100000 4620; GFX8-NEXT: s_bfe_u32 s10, 15, 0x100000 4621; GFX8-NEXT: s_lshr_b32 s6, s0, 16 4622; GFX8-NEXT: s_lshr_b32 s7, s2, 16 4623; GFX8-NEXT: s_lshl_b32 s0, s0, s8 4624; GFX8-NEXT: s_lshr_b32 s9, s9, s10 4625; GFX8-NEXT: s_or_b32 s0, s0, s9 4626; GFX8-NEXT: s_lshl_b32 s6, s6, s8 4627; GFX8-NEXT: s_lshr_b32 s9, s7, s10 4628; GFX8-NEXT: s_lshl_b32 s2, s2, s8 4629; GFX8-NEXT: s_xor_b32 s4, s4, -1 4630; GFX8-NEXT: s_or_b32 s6, s6, s9 4631; GFX8-NEXT: s_lshr_b32 s9, s4, 16 4632; GFX8-NEXT: s_and_b32 s11, s4, 15 4633; GFX8-NEXT: s_andn2_b32 s4, 15, s4 4634; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 4635; GFX8-NEXT: s_bfe_u32 s11, s11, 0x100000 4636; GFX8-NEXT: s_lshr_b32 s2, s2, s8 4637; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 4638; GFX8-NEXT: s_lshl_b32 s0, s0, s11 4639; GFX8-NEXT: s_lshr_b32 s2, s2, s4 4640; GFX8-NEXT: s_or_b32 s0, s0, s2 4641; GFX8-NEXT: s_and_b32 s2, s9, 15 4642; GFX8-NEXT: s_lshl_b32 s7, s7, s8 4643; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 4644; GFX8-NEXT: s_andn2_b32 s4, 15, s9 4645; GFX8-NEXT: s_lshl_b32 s2, s6, s2 4646; GFX8-NEXT: s_bfe_u32 s6, s7, 0x100000 4647; GFX8-NEXT: s_lshr_b32 s6, s6, s8 4648; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 4649; GFX8-NEXT: s_lshr_b32 s4, s6, s4 4650; GFX8-NEXT: s_or_b32 s2, s2, s4 4651; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 4652; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 4653; GFX8-NEXT: s_lshl_b32 s2, s2, 16 4654; GFX8-NEXT: s_bfe_u32 s6, s3, 0x100000 4655; GFX8-NEXT: s_or_b32 s0, s0, s2 4656; GFX8-NEXT: s_lshr_b32 s2, s1, 16 4657; GFX8-NEXT: s_lshr_b32 s4, s3, 16 4658; GFX8-NEXT: s_lshl_b32 s1, s1, s8 4659; GFX8-NEXT: s_lshr_b32 s6, s6, s10 4660; GFX8-NEXT: s_or_b32 s1, s1, s6 4661; GFX8-NEXT: s_lshl_b32 s2, s2, s8 4662; GFX8-NEXT: s_lshr_b32 s6, s4, s10 4663; GFX8-NEXT: s_lshl_b32 s3, s3, s8 4664; GFX8-NEXT: s_xor_b32 s5, s5, -1 4665; GFX8-NEXT: s_or_b32 s2, s2, s6 4666; GFX8-NEXT: s_lshr_b32 s6, s5, 16 4667; GFX8-NEXT: s_and_b32 s7, s5, 15 4668; GFX8-NEXT: s_andn2_b32 s5, 15, s5 4669; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 4670; GFX8-NEXT: s_bfe_u32 s7, s7, 0x100000 4671; GFX8-NEXT: s_lshr_b32 s3, s3, s8 4672; GFX8-NEXT: s_bfe_u32 s5, s5, 0x100000 4673; GFX8-NEXT: s_lshl_b32 s1, s1, s7 4674; GFX8-NEXT: s_lshr_b32 s3, s3, s5 4675; GFX8-NEXT: s_or_b32 s1, s1, s3 4676; GFX8-NEXT: s_and_b32 s3, s6, 15 4677; GFX8-NEXT: s_lshl_b32 s4, s4, s8 4678; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 4679; GFX8-NEXT: s_andn2_b32 s5, 15, s6 4680; GFX8-NEXT: s_lshl_b32 s2, s2, s3 4681; GFX8-NEXT: s_bfe_u32 s3, s4, 0x100000 4682; GFX8-NEXT: s_lshr_b32 s3, s3, s8 4683; GFX8-NEXT: s_bfe_u32 s4, s5, 0x100000 4684; GFX8-NEXT: s_lshr_b32 s3, s3, s4 4685; GFX8-NEXT: s_or_b32 s2, s2, s3 4686; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 4687; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 4688; GFX8-NEXT: s_lshl_b32 s2, s2, 16 4689; GFX8-NEXT: s_or_b32 s1, s1, s2 4690; GFX8-NEXT: ; return to shader part epilog 4691; 4692; GFX9-LABEL: s_fshr_v4i16: 4693; GFX9: ; %bb.0: 4694; GFX9-NEXT: s_lshr_b32 s7, s0, 16 4695; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 4696; GFX9-NEXT: s_lshl_b32 s7, s7, 1 4697; GFX9-NEXT: s_and_b32 s6, s4, 0xf000f 4698; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s4 4699; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s7 4700; GFX9-NEXT: s_lshr_b32 s7, s0, 16 4701; GFX9-NEXT: s_lshr_b32 s8, s4, 16 4702; GFX9-NEXT: s_lshl_b32 s0, s0, s4 4703; GFX9-NEXT: s_lshl_b32 s4, s7, s8 4704; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 4705; GFX9-NEXT: s_lshr_b32 s4, s2, 16 4706; GFX9-NEXT: s_and_b32 s2, s2, 0xffff 4707; GFX9-NEXT: s_lshr_b32 s7, s6, 16 4708; GFX9-NEXT: s_lshr_b32 s2, s2, s6 4709; GFX9-NEXT: s_lshr_b32 s4, s4, s7 4710; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 4711; GFX9-NEXT: s_or_b32 s0, s0, s2 4712; GFX9-NEXT: s_and_b32 s2, s5, 0xf000f 4713; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s5 4714; GFX9-NEXT: s_lshr_b32 s5, s1, 16 4715; GFX9-NEXT: s_lshl_b32 s1, s1, 0x10001 4716; GFX9-NEXT: s_lshl_b32 s5, s5, 1 4717; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5 4718; GFX9-NEXT: s_lshr_b32 s5, s1, 16 4719; GFX9-NEXT: s_lshr_b32 s6, s4, 16 4720; GFX9-NEXT: s_lshl_b32 s1, s1, s4 4721; GFX9-NEXT: s_lshl_b32 s4, s5, s6 4722; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4 4723; GFX9-NEXT: s_lshr_b32 s4, s3, 16 4724; GFX9-NEXT: s_and_b32 s3, s3, 0xffff 4725; GFX9-NEXT: s_lshr_b32 s5, s2, 16 4726; GFX9-NEXT: s_lshr_b32 s2, s3, s2 4727; GFX9-NEXT: s_lshr_b32 s3, s4, s5 4728; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3 4729; GFX9-NEXT: s_or_b32 s1, s1, s2 4730; GFX9-NEXT: ; return to shader part epilog 4731; 4732; GFX10-LABEL: s_fshr_v4i16: 4733; GFX10: ; %bb.0: 4734; GFX10-NEXT: s_lshr_b32 s6, s0, 16 4735; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 4736; GFX10-NEXT: s_lshl_b32 s6, s6, 1 4737; GFX10-NEXT: s_and_b32 s7, s4, 0xf000f 4738; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s6 4739; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s4 4740; GFX10-NEXT: s_lshr_b32 s6, s0, 16 4741; GFX10-NEXT: s_lshr_b32 s8, s4, 16 4742; GFX10-NEXT: s_lshl_b32 s0, s0, s4 4743; GFX10-NEXT: s_lshl_b32 s4, s6, s8 4744; GFX10-NEXT: s_lshr_b32 s6, s2, 16 4745; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4 4746; GFX10-NEXT: s_lshr_b32 s4, s1, 16 4747; GFX10-NEXT: s_and_b32 s2, s2, 0xffff 4748; GFX10-NEXT: s_lshr_b32 s8, s7, 16 4749; GFX10-NEXT: s_lshl_b32 s1, s1, 0x10001 4750; GFX10-NEXT: s_lshl_b32 s4, s4, 1 4751; GFX10-NEXT: s_lshr_b32 s2, s2, s7 4752; GFX10-NEXT: s_lshr_b32 s6, s6, s8 4753; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4 4754; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s5 4755; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s6 4756; GFX10-NEXT: s_and_b32 s6, s5, 0xf000f 4757; GFX10-NEXT: s_lshr_b32 s5, s1, 16 4758; GFX10-NEXT: s_lshr_b32 s7, s4, 16 4759; GFX10-NEXT: s_lshl_b32 s1, s1, s4 4760; GFX10-NEXT: s_lshl_b32 s4, s5, s7 4761; GFX10-NEXT: s_lshr_b32 s5, s3, 16 4762; GFX10-NEXT: s_and_b32 s3, s3, 0xffff 4763; GFX10-NEXT: s_lshr_b32 s7, s6, 16 4764; GFX10-NEXT: s_lshr_b32 s3, s3, s6 4765; GFX10-NEXT: s_lshr_b32 s5, s5, s7 4766; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4 4767; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s5 4768; GFX10-NEXT: s_or_b32 s0, s0, s2 4769; GFX10-NEXT: s_or_b32 s1, s1, s3 4770; GFX10-NEXT: ; return to shader part epilog 4771; 4772; GFX11-LABEL: s_fshr_v4i16: 4773; GFX11: ; %bb.0: 4774; GFX11-NEXT: s_lshr_b32 s6, s0, 16 4775; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001 4776; GFX11-NEXT: s_lshl_b32 s6, s6, 1 4777; GFX11-NEXT: s_and_b32 s7, s4, 0xf000f 4778; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s6 4779; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s4 4780; GFX11-NEXT: s_lshr_b32 s6, s0, 16 4781; GFX11-NEXT: s_lshr_b32 s8, s4, 16 4782; GFX11-NEXT: s_lshl_b32 s0, s0, s4 4783; GFX11-NEXT: s_lshl_b32 s4, s6, s8 4784; GFX11-NEXT: s_lshr_b32 s6, s2, 16 4785; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s4 4786; GFX11-NEXT: s_lshr_b32 s4, s1, 16 4787; GFX11-NEXT: s_and_b32 s2, s2, 0xffff 4788; GFX11-NEXT: s_lshr_b32 s8, s7, 16 4789; GFX11-NEXT: s_lshl_b32 s1, s1, 0x10001 4790; GFX11-NEXT: s_lshl_b32 s4, s4, 1 4791; GFX11-NEXT: s_lshr_b32 s2, s2, s7 4792; GFX11-NEXT: s_lshr_b32 s6, s6, s8 4793; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s4 4794; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s5 4795; GFX11-NEXT: s_pack_ll_b32_b16 s2, s2, s6 4796; GFX11-NEXT: s_and_b32 s6, s5, 0xf000f 4797; GFX11-NEXT: s_lshr_b32 s5, s1, 16 4798; GFX11-NEXT: s_lshr_b32 s7, s4, 16 4799; GFX11-NEXT: s_lshl_b32 s1, s1, s4 4800; GFX11-NEXT: s_lshl_b32 s4, s5, s7 4801; GFX11-NEXT: s_lshr_b32 s5, s3, 16 4802; GFX11-NEXT: s_and_b32 s3, s3, 0xffff 4803; GFX11-NEXT: s_lshr_b32 s7, s6, 16 4804; GFX11-NEXT: s_lshr_b32 s3, s3, s6 4805; GFX11-NEXT: s_lshr_b32 s5, s5, s7 4806; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s4 4807; GFX11-NEXT: s_pack_ll_b32_b16 s3, s3, s5 4808; GFX11-NEXT: s_or_b32 s0, s0, s2 4809; GFX11-NEXT: s_or_b32 s1, s1, s3 4810; GFX11-NEXT: ; return to shader part epilog 4811 %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) 4812 %cast.result = bitcast <4 x i16> %result to <2 x i32> 4813 ret <2 x i32> %cast.result 4814} 4815 4816define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) { 4817; GFX6-LABEL: v_fshr_v4i16: 4818; GFX6: ; %bb.0: 4819; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4820; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v9 4821; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8 4822; GFX6-NEXT: v_or_b32_e32 v8, v9, v8 4823; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v11 4824; GFX6-NEXT: v_and_b32_e32 v10, 0xffff, v10 4825; GFX6-NEXT: v_or_b32_e32 v9, v9, v10 4826; GFX6-NEXT: s_bfe_u32 s4, 1, 0x100000 4827; GFX6-NEXT: v_bfe_u32 v10, v4, 1, 15 4828; GFX6-NEXT: s_bfe_u32 s5, 14, 0x100000 4829; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0 4830; GFX6-NEXT: v_lshrrev_b32_e32 v10, s5, v10 4831; GFX6-NEXT: v_or_b32_e32 v0, v0, v10 4832; GFX6-NEXT: v_bfe_u32 v10, v5, 1, 15 4833; GFX6-NEXT: v_lshlrev_b32_e32 v1, s4, v1 4834; GFX6-NEXT: v_lshrrev_b32_e32 v10, s5, v10 4835; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 4836; GFX6-NEXT: v_or_b32_e32 v1, v1, v10 4837; GFX6-NEXT: v_lshrrev_b32_e32 v10, 16, v8 4838; GFX6-NEXT: v_and_b32_e32 v11, 15, v8 4839; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 4840; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4 4841; GFX6-NEXT: v_and_b32_e32 v8, 15, v8 4842; GFX6-NEXT: v_bfe_u32 v11, v11, 0, 16 4843; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15 4844; GFX6-NEXT: v_bfe_u32 v8, v8, 0, 16 4845; GFX6-NEXT: v_lshlrev_b32_e32 v0, v11, v0 4846; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4 4847; GFX6-NEXT: v_or_b32_e32 v0, v0, v4 4848; GFX6-NEXT: v_and_b32_e32 v4, 15, v10 4849; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10 4850; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5 4851; GFX6-NEXT: v_and_b32_e32 v8, 15, v8 4852; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16 4853; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1 4854; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15 4855; GFX6-NEXT: v_bfe_u32 v5, v8, 0, 16 4856; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4 4857; GFX6-NEXT: v_or_b32_e32 v1, v1, v4 4858; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15 4859; GFX6-NEXT: v_lshlrev_b32_e32 v2, s4, v2 4860; GFX6-NEXT: v_lshrrev_b32_e32 v4, s5, v4 4861; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 4862; GFX6-NEXT: v_bfe_u32 v4, v7, 1, 15 4863; GFX6-NEXT: v_lshlrev_b32_e32 v3, s4, v3 4864; GFX6-NEXT: v_lshrrev_b32_e32 v4, s5, v4 4865; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 4866; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v6 4867; GFX6-NEXT: v_xor_b32_e32 v6, -1, v9 4868; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v7 4869; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6 4870; GFX6-NEXT: v_and_b32_e32 v8, 15, v6 4871; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6 4872; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 4873; GFX6-NEXT: v_bfe_u32 v8, v8, 0, 16 4874; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15 4875; GFX6-NEXT: v_bfe_u32 v6, v6, 0, 16 4876; GFX6-NEXT: v_lshlrev_b32_e32 v2, v8, v2 4877; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4 4878; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 4879; GFX6-NEXT: v_and_b32_e32 v4, 15, v7 4880; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7 4881; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 4882; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16 4883; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3 4884; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15 4885; GFX6-NEXT: v_bfe_u32 v5, v6, 0, 16 4886; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4 4887; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 4888; GFX6-NEXT: s_setpc_b64 s[30:31] 4889; 4890; GFX8-LABEL: v_fshr_v4i16: 4891; GFX8: ; %bb.0: 4892; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4893; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v0 4894; GFX8-NEXT: v_lshrrev_b16_e32 v7, 15, v2 4895; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 4896; GFX8-NEXT: v_mov_b32_e32 v7, 1 4897; GFX8-NEXT: v_mov_b32_e32 v8, 15 4898; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4899; GFX8-NEXT: v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4900; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 4901; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 4902; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v2 4903; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4904; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v4 4905; GFX8-NEXT: v_and_b32_e32 v10, 15, v4 4906; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 4907; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 4908; GFX8-NEXT: v_lshrrev_b16_e32 v9, 1, v9 4909; GFX8-NEXT: v_lshlrev_b16_e32 v6, v10, v6 4910; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v9 4911; GFX8-NEXT: v_or_b32_e32 v4, v6, v4 4912; GFX8-NEXT: v_and_b32_e32 v6, 15, v7 4913; GFX8-NEXT: v_xor_b32_e32 v7, -1, v7 4914; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 4915; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2 4916; GFX8-NEXT: v_lshlrev_b16_e32 v0, v6, v0 4917; GFX8-NEXT: v_lshrrev_b16_e32 v2, v7, v2 4918; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 4919; GFX8-NEXT: v_mov_b32_e32 v2, 16 4920; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 4921; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4922; GFX8-NEXT: v_lshlrev_b16_e32 v4, 1, v1 4923; GFX8-NEXT: v_lshrrev_b16_e32 v6, 15, v3 4924; GFX8-NEXT: v_or_b32_e32 v4, v4, v6 4925; GFX8-NEXT: v_mov_b32_e32 v6, 1 4926; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4927; GFX8-NEXT: v_lshrrev_b16_sdwa v7, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4928; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 4929; GFX8-NEXT: v_or_b32_e32 v1, v1, v7 4930; GFX8-NEXT: v_lshlrev_b16_e32 v7, 1, v3 4931; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4932; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v5 4933; GFX8-NEXT: v_and_b32_e32 v8, 15, v5 4934; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 4935; GFX8-NEXT: v_and_b32_e32 v5, 15, v5 4936; GFX8-NEXT: v_lshrrev_b16_e32 v7, 1, v7 4937; GFX8-NEXT: v_lshlrev_b16_e32 v4, v8, v4 4938; GFX8-NEXT: v_lshrrev_b16_e32 v5, v5, v7 4939; GFX8-NEXT: v_or_b32_e32 v4, v4, v5 4940; GFX8-NEXT: v_and_b32_e32 v5, 15, v6 4941; GFX8-NEXT: v_xor_b32_e32 v6, -1, v6 4942; GFX8-NEXT: v_and_b32_e32 v6, 15, v6 4943; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3 4944; GFX8-NEXT: v_lshlrev_b16_e32 v1, v5, v1 4945; GFX8-NEXT: v_lshrrev_b16_e32 v3, v6, v3 4946; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 4947; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 4948; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4949; GFX8-NEXT: s_setpc_b64 s[30:31] 4950; 4951; GFX9-LABEL: v_fshr_v4i16: 4952; GFX9: ; %bb.0: 4953; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4954; GFX9-NEXT: v_and_b32_e32 v6, 0xf000f, v4 4955; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4 4956; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4 4957; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4958; GFX9-NEXT: v_pk_lshlrev_b16 v0, v4, v0 4959; GFX9-NEXT: v_pk_lshrrev_b16 v2, v6, v2 4960; GFX9-NEXT: v_xor_b32_e32 v4, -1, v5 4961; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 4962; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v5 4963; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4 4964; GFX9-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] 4965; GFX9-NEXT: v_pk_lshlrev_b16 v1, v4, v1 4966; GFX9-NEXT: v_pk_lshrrev_b16 v2, v2, v3 4967; GFX9-NEXT: v_or_b32_e32 v1, v1, v2 4968; GFX9-NEXT: s_setpc_b64 s[30:31] 4969; 4970; GFX10-LABEL: v_fshr_v4i16: 4971; GFX10: ; %bb.0: 4972; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4973; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4974; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4 4975; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 4976; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4 4977; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4978; GFX10-NEXT: v_and_b32_e32 v5, 0xf000f, v5 4979; GFX10-NEXT: v_and_b32_e32 v6, 0xf000f, v6 4980; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] 4981; GFX10-NEXT: v_and_b32_e32 v7, 0xf000f, v7 4982; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v2 4983; GFX10-NEXT: v_pk_lshrrev_b16 v3, v5, v3 4984; GFX10-NEXT: v_pk_lshlrev_b16 v0, v6, v0 4985; GFX10-NEXT: v_pk_lshlrev_b16 v1, v7, v1 4986; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 4987; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 4988; GFX10-NEXT: s_setpc_b64 s[30:31] 4989; 4990; GFX11-LABEL: v_fshr_v4i16: 4991; GFX11: ; %bb.0: 4992; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4993; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 4994; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4 4995; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5 4996; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4 4997; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4998; GFX11-NEXT: v_and_b32_e32 v5, 0xf000f, v5 4999; GFX11-NEXT: v_and_b32_e32 v6, 0xf000f, v6 5000; GFX11-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] 5001; GFX11-NEXT: v_and_b32_e32 v7, 0xf000f, v7 5002; GFX11-NEXT: v_pk_lshrrev_b16 v2, v4, v2 5003; GFX11-NEXT: v_pk_lshrrev_b16 v3, v5, v3 5004; GFX11-NEXT: v_pk_lshlrev_b16 v0, v6, v0 5005; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 5006; GFX11-NEXT: v_pk_lshlrev_b16 v1, v7, v1 5007; GFX11-NEXT: v_or_b32_e32 v0, v0, v2 5008; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 5009; GFX11-NEXT: v_or_b32_e32 v1, v1, v3 5010; GFX11-NEXT: s_setpc_b64 s[30:31] 5011 %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) 5012 %cast.result = bitcast <4 x i16> %result to <4 x half> 5013 ret <4 x half> %cast.result 5014} 5015 5016define amdgpu_ps i64 @s_fshr_i64(i64 inreg %lhs, i64 inreg %rhs, i64 inreg %amt) { 5017; GFX6-LABEL: s_fshr_i64: 5018; GFX6: ; %bb.0: 5019; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 63 5020; GFX6-NEXT: s_andn2_b64 s[4:5], 63, s[4:5] 5021; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5022; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 5023; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s6 5024; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5025; GFX6-NEXT: ; return to shader part epilog 5026; 5027; GFX8-LABEL: s_fshr_i64: 5028; GFX8: ; %bb.0: 5029; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 63 5030; GFX8-NEXT: s_andn2_b64 s[4:5], 63, s[4:5] 5031; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5032; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 5033; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s6 5034; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5035; GFX8-NEXT: ; return to shader part epilog 5036; 5037; GFX9-LABEL: s_fshr_i64: 5038; GFX9: ; %bb.0: 5039; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 63 5040; GFX9-NEXT: s_andn2_b64 s[4:5], 63, s[4:5] 5041; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5042; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 5043; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s6 5044; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5045; GFX9-NEXT: ; return to shader part epilog 5046; 5047; GFX10-LABEL: s_fshr_i64: 5048; GFX10: ; %bb.0: 5049; GFX10-NEXT: s_andn2_b64 s[6:7], 63, s[4:5] 5050; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5051; GFX10-NEXT: s_and_b64 s[4:5], s[4:5], 63 5052; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s6 5053; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s4 5054; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5055; GFX10-NEXT: ; return to shader part epilog 5056; 5057; GFX11-LABEL: s_fshr_i64: 5058; GFX11: ; %bb.0: 5059; GFX11-NEXT: s_and_not1_b64 s[6:7], 63, s[4:5] 5060; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5061; GFX11-NEXT: s_and_b64 s[4:5], s[4:5], 63 5062; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s6 5063; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], s4 5064; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5065; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5066; GFX11-NEXT: ; return to shader part epilog 5067 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 5068 ret i64 %result 5069} 5070 5071define amdgpu_ps i64 @s_fshr_i64_5(i64 inreg %lhs, i64 inreg %rhs) { 5072; GCN-LABEL: s_fshr_i64_5: 5073; GCN: ; %bb.0: 5074; GCN-NEXT: s_lshl_b32 s1, s0, 27 5075; GCN-NEXT: s_mov_b32 s0, 0 5076; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], 5 5077; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5078; GCN-NEXT: ; return to shader part epilog 5079; 5080; GFX11-LABEL: s_fshr_i64_5: 5081; GFX11: ; %bb.0: 5082; GFX11-NEXT: s_lshl_b32 s1, s0, 27 5083; GFX11-NEXT: s_mov_b32 s0, 0 5084; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 5 5085; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5086; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5087; GFX11-NEXT: ; return to shader part epilog 5088 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5) 5089 ret i64 %result 5090} 5091 5092define amdgpu_ps i64 @s_fshr_i64_32(i64 inreg %lhs, i64 inreg %rhs) { 5093; GCN-LABEL: s_fshr_i64_32: 5094; GCN: ; %bb.0: 5095; GCN-NEXT: s_mov_b32 s1, s0 5096; GCN-NEXT: s_mov_b32 s0, 0 5097; GCN-NEXT: s_mov_b32 s2, s3 5098; GCN-NEXT: s_mov_b32 s3, s0 5099; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5100; GCN-NEXT: ; return to shader part epilog 5101; 5102; GFX11-LABEL: s_fshr_i64_32: 5103; GFX11: ; %bb.0: 5104; GFX11-NEXT: s_mov_b32 s1, s0 5105; GFX11-NEXT: s_mov_b32 s0, 0 5106; GFX11-NEXT: s_mov_b32 s2, s3 5107; GFX11-NEXT: s_mov_b32 s3, s0 5108; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5109; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5110; GFX11-NEXT: ; return to shader part epilog 5111 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32) 5112 ret i64 %result 5113} 5114 5115define amdgpu_ps i64 @s_fshr_i64_48(i64 inreg %lhs, i64 inreg %rhs) { 5116; GCN-LABEL: s_fshr_i64_48: 5117; GCN: ; %bb.0: 5118; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 16 5119; GCN-NEXT: s_lshr_b32 s2, s3, 16 5120; GCN-NEXT: s_mov_b32 s3, 0 5121; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5122; GCN-NEXT: ; return to shader part epilog 5123; 5124; GFX11-LABEL: s_fshr_i64_48: 5125; GFX11: ; %bb.0: 5126; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 16 5127; GFX11-NEXT: s_lshr_b32 s2, s3, 16 5128; GFX11-NEXT: s_mov_b32 s3, 0 5129; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5130; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5131; GFX11-NEXT: ; return to shader part epilog 5132 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48) 5133 ret i64 %result 5134} 5135 5136define i64 @v_fshr_i64(i64 %lhs, i64 %rhs, i64 %amt) { 5137; GFX6-LABEL: v_fshr_i64: 5138; GFX6: ; %bb.0: 5139; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5140; GFX6-NEXT: v_and_b32_e32 v5, 63, v4 5141; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 5142; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 5143; GFX6-NEXT: v_and_b32_e32 v4, 63, v4 5144; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v4 5145; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], v5 5146; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 5147; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 5148; GFX6-NEXT: s_setpc_b64 s[30:31] 5149; 5150; GFX8-LABEL: v_fshr_i64: 5151; GFX8: ; %bb.0: 5152; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5153; GFX8-NEXT: v_and_b32_e32 v5, 63, v4 5154; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 5155; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5156; GFX8-NEXT: v_and_b32_e32 v4, 63, v4 5157; GFX8-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] 5158; GFX8-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3] 5159; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 5160; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 5161; GFX8-NEXT: s_setpc_b64 s[30:31] 5162; 5163; GFX9-LABEL: v_fshr_i64: 5164; GFX9: ; %bb.0: 5165; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5166; GFX9-NEXT: v_and_b32_e32 v5, 63, v4 5167; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4 5168; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5169; GFX9-NEXT: v_and_b32_e32 v4, 63, v4 5170; GFX9-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] 5171; GFX9-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3] 5172; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 5173; GFX9-NEXT: v_or_b32_e32 v1, v1, v3 5174; GFX9-NEXT: s_setpc_b64 s[30:31] 5175; 5176; GFX10-LABEL: v_fshr_i64: 5177; GFX10: ; %bb.0: 5178; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5179; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5180; GFX10-NEXT: v_xor_b32_e32 v5, -1, v4 5181; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5182; GFX10-NEXT: v_and_b32_e32 v4, 63, v4 5183; GFX10-NEXT: v_and_b32_e32 v5, 63, v5 5184; GFX10-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3] 5185; GFX10-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1] 5186; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 5187; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 5188; GFX10-NEXT: s_setpc_b64 s[30:31] 5189; 5190; GFX11-LABEL: v_fshr_i64: 5191; GFX11: ; %bb.0: 5192; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5193; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5194; GFX11-NEXT: v_xor_b32_e32 v5, -1, v4 5195; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5196; GFX11-NEXT: v_and_b32_e32 v4, 63, v4 5197; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 5198; GFX11-NEXT: v_and_b32_e32 v5, 63, v5 5199; GFX11-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3] 5200; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 5201; GFX11-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1] 5202; GFX11-NEXT: v_or_b32_e32 v0, v0, v2 5203; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 5204; GFX11-NEXT: v_or_b32_e32 v1, v1, v3 5205; GFX11-NEXT: s_setpc_b64 s[30:31] 5206 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 5207 ret i64 %result 5208} 5209 5210define i64 @v_fshr_i64_5(i64 %lhs, i64 %rhs) { 5211; GFX6-LABEL: v_fshr_i64_5: 5212; GFX6: ; %bb.0: 5213; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5214; GFX6-NEXT: v_mov_b32_e32 v4, v0 5215; GFX6-NEXT: v_lshr_b64 v[0:1], v[2:3], 5 5216; GFX6-NEXT: v_lshlrev_b32_e32 v2, 27, v4 5217; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 5218; GFX6-NEXT: s_setpc_b64 s[30:31] 5219; 5220; GFX8-LABEL: v_fshr_i64_5: 5221; GFX8: ; %bb.0: 5222; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5223; GFX8-NEXT: v_mov_b32_e32 v4, v0 5224; GFX8-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] 5225; GFX8-NEXT: v_lshlrev_b32_e32 v2, 27, v4 5226; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 5227; GFX8-NEXT: s_setpc_b64 s[30:31] 5228; 5229; GFX9-LABEL: v_fshr_i64_5: 5230; GFX9: ; %bb.0: 5231; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5232; GFX9-NEXT: v_mov_b32_e32 v4, v0 5233; GFX9-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] 5234; GFX9-NEXT: v_lshlrev_b32_e32 v2, 27, v4 5235; GFX9-NEXT: v_or_b32_e32 v1, v2, v1 5236; GFX9-NEXT: s_setpc_b64 s[30:31] 5237; 5238; GFX10-LABEL: v_fshr_i64_5: 5239; GFX10: ; %bb.0: 5240; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5241; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5242; GFX10-NEXT: v_mov_b32_e32 v4, v0 5243; GFX10-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] 5244; GFX10-NEXT: v_lshlrev_b32_e32 v2, 27, v4 5245; GFX10-NEXT: v_or_b32_e32 v1, v2, v1 5246; GFX10-NEXT: s_setpc_b64 s[30:31] 5247; 5248; GFX11-LABEL: v_fshr_i64_5: 5249; GFX11: ; %bb.0: 5250; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5251; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5252; GFX11-NEXT: v_mov_b32_e32 v4, v0 5253; GFX11-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] 5254; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 5255; GFX11-NEXT: v_lshlrev_b32_e32 v2, 27, v4 5256; GFX11-NEXT: v_or_b32_e32 v1, v2, v1 5257; GFX11-NEXT: s_setpc_b64 s[30:31] 5258 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5) 5259 ret i64 %result 5260} 5261 5262define i64 @v_fshr_i64_32(i64 %lhs, i64 %rhs) { 5263; GFX6-LABEL: v_fshr_i64_32: 5264; GFX6: ; %bb.0: 5265; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5266; GFX6-NEXT: v_mov_b32_e32 v1, v0 5267; GFX6-NEXT: v_mov_b32_e32 v0, v3 5268; GFX6-NEXT: s_setpc_b64 s[30:31] 5269; 5270; GFX8-LABEL: v_fshr_i64_32: 5271; GFX8: ; %bb.0: 5272; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5273; GFX8-NEXT: v_mov_b32_e32 v1, v0 5274; GFX8-NEXT: v_mov_b32_e32 v0, v3 5275; GFX8-NEXT: s_setpc_b64 s[30:31] 5276; 5277; GFX9-LABEL: v_fshr_i64_32: 5278; GFX9: ; %bb.0: 5279; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5280; GFX9-NEXT: v_mov_b32_e32 v1, v0 5281; GFX9-NEXT: v_mov_b32_e32 v0, v3 5282; GFX9-NEXT: s_setpc_b64 s[30:31] 5283; 5284; GFX10-LABEL: v_fshr_i64_32: 5285; GFX10: ; %bb.0: 5286; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5287; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5288; GFX10-NEXT: v_mov_b32_e32 v1, v0 5289; GFX10-NEXT: v_mov_b32_e32 v0, v3 5290; GFX10-NEXT: s_setpc_b64 s[30:31] 5291; 5292; GFX11-LABEL: v_fshr_i64_32: 5293; GFX11: ; %bb.0: 5294; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5295; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5296; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3 5297; GFX11-NEXT: s_setpc_b64 s[30:31] 5298 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32) 5299 ret i64 %result 5300} 5301 5302define i64 @v_fshr_i64_48(i64 %lhs, i64 %rhs) { 5303; GFX6-LABEL: v_fshr_i64_48: 5304; GFX6: ; %bb.0: 5305; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5306; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 16 5307; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v3 5308; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 5309; GFX6-NEXT: s_setpc_b64 s[30:31] 5310; 5311; GFX8-LABEL: v_fshr_i64_48: 5312; GFX8: ; %bb.0: 5313; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5314; GFX8-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] 5315; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5316; GFX8-NEXT: s_setpc_b64 s[30:31] 5317; 5318; GFX9-LABEL: v_fshr_i64_48: 5319; GFX9: ; %bb.0: 5320; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5321; GFX9-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] 5322; GFX9-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5323; GFX9-NEXT: s_setpc_b64 s[30:31] 5324; 5325; GFX10-LABEL: v_fshr_i64_48: 5326; GFX10: ; %bb.0: 5327; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5328; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5329; GFX10-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] 5330; GFX10-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5331; GFX10-NEXT: s_setpc_b64 s[30:31] 5332; 5333; GFX11-LABEL: v_fshr_i64_48: 5334; GFX11: ; %bb.0: 5335; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5336; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5337; GFX11-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] 5338; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v3 5339; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 5340; GFX11-NEXT: v_or_b32_e32 v0, v0, v2 5341; GFX11-NEXT: s_setpc_b64 s[30:31] 5342 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48) 5343 ret i64 %result 5344} 5345 5346define amdgpu_ps <2 x float> @v_fshr_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) { 5347; GFX6-LABEL: v_fshr_i64_ssv: 5348; GFX6: ; %bb.0: 5349; GFX6-NEXT: v_and_b32_e32 v2, 63, v0 5350; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 5351; GFX6-NEXT: v_and_b32_e32 v0, 63, v0 5352; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5353; GFX6-NEXT: v_lshl_b64 v[0:1], s[0:1], v0 5354; GFX6-NEXT: v_lshr_b64 v[2:3], s[2:3], v2 5355; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 5356; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 5357; GFX6-NEXT: ; return to shader part epilog 5358; 5359; GFX8-LABEL: v_fshr_i64_ssv: 5360; GFX8: ; %bb.0: 5361; GFX8-NEXT: v_and_b32_e32 v2, 63, v0 5362; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 5363; GFX8-NEXT: v_and_b32_e32 v0, 63, v0 5364; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5365; GFX8-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1] 5366; GFX8-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3] 5367; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 5368; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 5369; GFX8-NEXT: ; return to shader part epilog 5370; 5371; GFX9-LABEL: v_fshr_i64_ssv: 5372; GFX9: ; %bb.0: 5373; GFX9-NEXT: v_and_b32_e32 v2, 63, v0 5374; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 5375; GFX9-NEXT: v_and_b32_e32 v0, 63, v0 5376; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5377; GFX9-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1] 5378; GFX9-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3] 5379; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 5380; GFX9-NEXT: v_or_b32_e32 v1, v1, v3 5381; GFX9-NEXT: ; return to shader part epilog 5382; 5383; GFX10-LABEL: v_fshr_i64_ssv: 5384; GFX10: ; %bb.0: 5385; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 5386; GFX10-NEXT: v_and_b32_e32 v0, 63, v0 5387; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5388; GFX10-NEXT: v_and_b32_e32 v2, 63, v1 5389; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[2:3] 5390; GFX10-NEXT: v_lshlrev_b64 v[2:3], v2, s[0:1] 5391; GFX10-NEXT: v_or_b32_e32 v0, v2, v0 5392; GFX10-NEXT: v_or_b32_e32 v1, v3, v1 5393; GFX10-NEXT: ; return to shader part epilog 5394; 5395; GFX11-LABEL: v_fshr_i64_ssv: 5396; GFX11: ; %bb.0: 5397; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0 5398; GFX11-NEXT: v_and_b32_e32 v0, 63, v0 5399; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5400; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 5401; GFX11-NEXT: v_and_b32_e32 v2, 63, v1 5402; GFX11-NEXT: v_lshrrev_b64 v[0:1], v0, s[2:3] 5403; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 5404; GFX11-NEXT: v_lshlrev_b64 v[2:3], v2, s[0:1] 5405; GFX11-NEXT: v_or_b32_e32 v0, v2, v0 5406; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 5407; GFX11-NEXT: v_or_b32_e32 v1, v3, v1 5408; GFX11-NEXT: ; return to shader part epilog 5409 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 5410 %cast = bitcast i64 %result to <2 x float> 5411 ret <2 x float> %cast 5412} 5413 5414define amdgpu_ps <2 x float> @v_fshr_i64_svs(i64 inreg %lhs, i64 %rhs, i64 inreg %amt) { 5415; GFX6-LABEL: v_fshr_i64_svs: 5416; GFX6: ; %bb.0: 5417; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63 5418; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 5419; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5420; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], s4 5421; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 5422; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 5423; GFX6-NEXT: v_or_b32_e32 v1, s1, v1 5424; GFX6-NEXT: ; return to shader part epilog 5425; 5426; GFX8-LABEL: v_fshr_i64_svs: 5427; GFX8: ; %bb.0: 5428; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63 5429; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 5430; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5431; GFX8-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 5432; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 5433; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 5434; GFX8-NEXT: v_or_b32_e32 v1, s1, v1 5435; GFX8-NEXT: ; return to shader part epilog 5436; 5437; GFX9-LABEL: v_fshr_i64_svs: 5438; GFX9: ; %bb.0: 5439; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63 5440; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 5441; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5442; GFX9-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 5443; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 5444; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 5445; GFX9-NEXT: v_or_b32_e32 v1, s1, v1 5446; GFX9-NEXT: ; return to shader part epilog 5447; 5448; GFX10-LABEL: v_fshr_i64_svs: 5449; GFX10: ; %bb.0: 5450; GFX10-NEXT: s_and_b64 s[4:5], s[2:3], 63 5451; GFX10-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 5452; GFX10-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 5453; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5454; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 5455; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 5456; GFX10-NEXT: v_or_b32_e32 v1, s1, v1 5457; GFX10-NEXT: ; return to shader part epilog 5458; 5459; GFX11-LABEL: v_fshr_i64_svs: 5460; GFX11: ; %bb.0: 5461; GFX11-NEXT: s_and_b64 s[4:5], s[2:3], 63 5462; GFX11-NEXT: s_and_not1_b64 s[2:3], 63, s[2:3] 5463; GFX11-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 5464; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5465; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5466; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 5467; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 5468; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 5469; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 5470; GFX11-NEXT: v_or_b32_e32 v1, s1, v1 5471; GFX11-NEXT: ; return to shader part epilog 5472 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 5473 %cast = bitcast i64 %result to <2 x float> 5474 ret <2 x float> %cast 5475} 5476 5477define amdgpu_ps <2 x float> @v_fshr_i64_vss(i64 %lhs, i64 inreg %rhs, i64 inreg %amt) { 5478; GFX6-LABEL: v_fshr_i64_vss: 5479; GFX6: ; %bb.0: 5480; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 5481; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63 5482; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 5483; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s2 5484; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 5485; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 5486; GFX6-NEXT: v_or_b32_e32 v1, s1, v1 5487; GFX6-NEXT: ; return to shader part epilog 5488; 5489; GFX8-LABEL: v_fshr_i64_vss: 5490; GFX8: ; %bb.0: 5491; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5492; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63 5493; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 5494; GFX8-NEXT: v_lshlrev_b64 v[0:1], s2, v[0:1] 5495; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 5496; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 5497; GFX8-NEXT: v_or_b32_e32 v1, s1, v1 5498; GFX8-NEXT: ; return to shader part epilog 5499; 5500; GFX9-LABEL: v_fshr_i64_vss: 5501; GFX9: ; %bb.0: 5502; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5503; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63 5504; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 5505; GFX9-NEXT: v_lshlrev_b64 v[0:1], s2, v[0:1] 5506; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 5507; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 5508; GFX9-NEXT: v_or_b32_e32 v1, s1, v1 5509; GFX9-NEXT: ; return to shader part epilog 5510; 5511; GFX10-LABEL: v_fshr_i64_vss: 5512; GFX10: ; %bb.0: 5513; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5514; GFX10-NEXT: s_andn2_b64 s[4:5], 63, s[2:3] 5515; GFX10-NEXT: s_and_b64 s[2:3], s[2:3], 63 5516; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 5517; GFX10-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1] 5518; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 5519; GFX10-NEXT: v_or_b32_e32 v1, s1, v1 5520; GFX10-NEXT: ; return to shader part epilog 5521; 5522; GFX11-LABEL: v_fshr_i64_vss: 5523; GFX11: ; %bb.0: 5524; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5525; GFX11-NEXT: s_and_not1_b64 s[4:5], 63, s[2:3] 5526; GFX11-NEXT: s_and_b64 s[2:3], s[2:3], 63 5527; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5528; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 5529; GFX11-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1] 5530; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 5531; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 5532; GFX11-NEXT: v_or_b32_e32 v1, s1, v1 5533; GFX11-NEXT: ; return to shader part epilog 5534 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 5535 %cast = bitcast i64 %result to <2 x float> 5536 ret <2 x float> %cast 5537} 5538 5539define amdgpu_ps <2 x i64> @s_fshr_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs, <2 x i64> inreg %amt) { 5540; GFX6-LABEL: s_fshr_v2i64: 5541; GFX6: ; %bb.0: 5542; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], 63 5543; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[8:9] 5544; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5545; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 5546; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s12 5547; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 5548; GFX6-NEXT: s_and_b64 s[4:5], s[10:11], 63 5549; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] 5550; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5551; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 5552; GFX6-NEXT: s_lshr_b64 s[4:5], s[6:7], s4 5553; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 5554; GFX6-NEXT: ; return to shader part epilog 5555; 5556; GFX8-LABEL: s_fshr_v2i64: 5557; GFX8: ; %bb.0: 5558; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], 63 5559; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[8:9] 5560; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5561; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 5562; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s12 5563; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 5564; GFX8-NEXT: s_and_b64 s[4:5], s[10:11], 63 5565; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] 5566; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5567; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 5568; GFX8-NEXT: s_lshr_b64 s[4:5], s[6:7], s4 5569; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 5570; GFX8-NEXT: ; return to shader part epilog 5571; 5572; GFX9-LABEL: s_fshr_v2i64: 5573; GFX9: ; %bb.0: 5574; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], 63 5575; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[8:9] 5576; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5577; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 5578; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s12 5579; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 5580; GFX9-NEXT: s_and_b64 s[4:5], s[10:11], 63 5581; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] 5582; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5583; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 5584; GFX9-NEXT: s_lshr_b64 s[4:5], s[6:7], s4 5585; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 5586; GFX9-NEXT: ; return to shader part epilog 5587; 5588; GFX10-LABEL: s_fshr_v2i64: 5589; GFX10: ; %bb.0: 5590; GFX10-NEXT: s_andn2_b64 s[12:13], 63, s[8:9] 5591; GFX10-NEXT: s_and_b64 s[8:9], s[8:9], 63 5592; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5593; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s8 5594; GFX10-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] 5595; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5596; GFX10-NEXT: s_and_b64 s[10:11], s[10:11], 63 5597; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s12 5598; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 5599; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s10 5600; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 5601; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] 5602; GFX10-NEXT: ; return to shader part epilog 5603; 5604; GFX11-LABEL: s_fshr_v2i64: 5605; GFX11: ; %bb.0: 5606; GFX11-NEXT: s_and_not1_b64 s[12:13], 63, s[8:9] 5607; GFX11-NEXT: s_and_b64 s[8:9], s[8:9], 63 5608; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5609; GFX11-NEXT: s_lshr_b64 s[4:5], s[4:5], s8 5610; GFX11-NEXT: s_and_not1_b64 s[8:9], 63, s[10:11] 5611; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5612; GFX11-NEXT: s_and_b64 s[10:11], s[10:11], 63 5613; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s12 5614; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 5615; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], s10 5616; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 5617; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] 5618; GFX11-NEXT: ; return to shader part epilog 5619 %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) 5620 ret <2 x i64> %result 5621} 5622 5623define <2 x i64> @v_fshr_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) { 5624; GFX6-LABEL: v_fshr_v2i64: 5625; GFX6: ; %bb.0: 5626; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5627; GFX6-NEXT: v_and_b32_e32 v9, 63, v8 5628; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 5629; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 5630; GFX6-NEXT: v_and_b32_e32 v8, 63, v8 5631; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v8 5632; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], v9 5633; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10 5634; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 5635; GFX6-NEXT: v_or_b32_e32 v0, v0, v4 5636; GFX6-NEXT: v_and_b32_e32 v4, 63, v10 5637; GFX6-NEXT: v_and_b32_e32 v8, 63, v8 5638; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], v8 5639; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], v4 5640; GFX6-NEXT: v_or_b32_e32 v1, v1, v5 5641; GFX6-NEXT: v_or_b32_e32 v2, v2, v6 5642; GFX6-NEXT: v_or_b32_e32 v3, v3, v7 5643; GFX6-NEXT: s_setpc_b64 s[30:31] 5644; 5645; GFX8-LABEL: v_fshr_v2i64: 5646; GFX8: ; %bb.0: 5647; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5648; GFX8-NEXT: v_and_b32_e32 v9, 63, v8 5649; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8 5650; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5651; GFX8-NEXT: v_and_b32_e32 v8, 63, v8 5652; GFX8-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1] 5653; GFX8-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5] 5654; GFX8-NEXT: v_xor_b32_e32 v8, -1, v10 5655; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5656; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 5657; GFX8-NEXT: v_and_b32_e32 v4, 63, v10 5658; GFX8-NEXT: v_and_b32_e32 v8, 63, v8 5659; GFX8-NEXT: v_lshlrev_b64 v[2:3], v8, v[2:3] 5660; GFX8-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7] 5661; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 5662; GFX8-NEXT: v_or_b32_e32 v2, v2, v6 5663; GFX8-NEXT: v_or_b32_e32 v3, v3, v7 5664; GFX8-NEXT: s_setpc_b64 s[30:31] 5665; 5666; GFX9-LABEL: v_fshr_v2i64: 5667; GFX9: ; %bb.0: 5668; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5669; GFX9-NEXT: v_and_b32_e32 v9, 63, v8 5670; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8 5671; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5672; GFX9-NEXT: v_and_b32_e32 v8, 63, v8 5673; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1] 5674; GFX9-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5] 5675; GFX9-NEXT: v_xor_b32_e32 v8, -1, v10 5676; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5677; GFX9-NEXT: v_or_b32_e32 v0, v0, v4 5678; GFX9-NEXT: v_and_b32_e32 v4, 63, v10 5679; GFX9-NEXT: v_and_b32_e32 v8, 63, v8 5680; GFX9-NEXT: v_lshlrev_b64 v[2:3], v8, v[2:3] 5681; GFX9-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7] 5682; GFX9-NEXT: v_or_b32_e32 v1, v1, v5 5683; GFX9-NEXT: v_or_b32_e32 v2, v2, v6 5684; GFX9-NEXT: v_or_b32_e32 v3, v3, v7 5685; GFX9-NEXT: s_setpc_b64 s[30:31] 5686; 5687; GFX10-LABEL: v_fshr_v2i64: 5688; GFX10: ; %bb.0: 5689; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5690; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5691; GFX10-NEXT: v_xor_b32_e32 v9, -1, v8 5692; GFX10-NEXT: v_xor_b32_e32 v11, -1, v10 5693; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5694; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5695; GFX10-NEXT: v_and_b32_e32 v8, 63, v8 5696; GFX10-NEXT: v_and_b32_e32 v9, 63, v9 5697; GFX10-NEXT: v_and_b32_e32 v11, 63, v11 5698; GFX10-NEXT: v_and_b32_e32 v10, 63, v10 5699; GFX10-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5] 5700; GFX10-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1] 5701; GFX10-NEXT: v_lshlrev_b64 v[2:3], v11, v[2:3] 5702; GFX10-NEXT: v_lshrrev_b64 v[6:7], v10, v[6:7] 5703; GFX10-NEXT: v_or_b32_e32 v0, v0, v4 5704; GFX10-NEXT: v_or_b32_e32 v1, v1, v5 5705; GFX10-NEXT: v_or_b32_e32 v2, v2, v6 5706; GFX10-NEXT: v_or_b32_e32 v3, v3, v7 5707; GFX10-NEXT: s_setpc_b64 s[30:31] 5708; 5709; GFX11-LABEL: v_fshr_v2i64: 5710; GFX11: ; %bb.0: 5711; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5712; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 5713; GFX11-NEXT: v_xor_b32_e32 v9, -1, v8 5714; GFX11-NEXT: v_xor_b32_e32 v11, -1, v10 5715; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5716; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5717; GFX11-NEXT: v_and_b32_e32 v8, 63, v8 5718; GFX11-NEXT: v_and_b32_e32 v9, 63, v9 5719; GFX11-NEXT: v_and_b32_e32 v11, 63, v11 5720; GFX11-NEXT: v_and_b32_e32 v10, 63, v10 5721; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 5722; GFX11-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5] 5723; GFX11-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1] 5724; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 5725; GFX11-NEXT: v_lshlrev_b64 v[2:3], v11, v[2:3] 5726; GFX11-NEXT: v_lshrrev_b64 v[6:7], v10, v[6:7] 5727; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) 5728; GFX11-NEXT: v_or_b32_e32 v0, v0, v4 5729; GFX11-NEXT: v_or_b32_e32 v1, v1, v5 5730; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) 5731; GFX11-NEXT: v_or_b32_e32 v2, v2, v6 5732; GFX11-NEXT: v_or_b32_e32 v3, v3, v7 5733; GFX11-NEXT: s_setpc_b64 s[30:31] 5734 %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) 5735 ret <2 x i64> %result 5736} 5737 5738define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) { 5739; GFX6-LABEL: s_fshr_i128: 5740; GFX6: ; %bb.0: 5741; GFX6-NEXT: s_movk_i32 s10, 0x7f 5742; GFX6-NEXT: s_mov_b32 s11, 0 5743; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] 5744; GFX6-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] 5745; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5746; GFX6-NEXT: s_lshr_b32 s10, s1, 31 5747; GFX6-NEXT: s_lshl_b64 s[14:15], s[0:1], 1 5748; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[10:11] 5749; GFX6-NEXT: s_sub_i32 s13, s8, 64 5750; GFX6-NEXT: s_sub_i32 s9, 64, s8 5751; GFX6-NEXT: s_cmp_lt_u32 s8, 64 5752; GFX6-NEXT: s_cselect_b32 s16, 1, 0 5753; GFX6-NEXT: s_cmp_eq_u32 s8, 0 5754; GFX6-NEXT: s_cselect_b32 s17, 1, 0 5755; GFX6-NEXT: s_lshl_b64 s[2:3], s[14:15], s8 5756; GFX6-NEXT: s_lshr_b64 s[10:11], s[14:15], s9 5757; GFX6-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 5758; GFX6-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] 5759; GFX6-NEXT: s_lshl_b64 s[10:11], s[14:15], s13 5760; GFX6-NEXT: s_cmp_lg_u32 s16, 0 5761; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 5762; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 5763; GFX6-NEXT: s_cmp_lg_u32 s17, 0 5764; GFX6-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9] 5765; GFX6-NEXT: s_sub_i32 s14, s12, 64 5766; GFX6-NEXT: s_sub_i32 s13, 64, s12 5767; GFX6-NEXT: s_cmp_lt_u32 s12, 64 5768; GFX6-NEXT: s_cselect_b32 s15, 1, 0 5769; GFX6-NEXT: s_cmp_eq_u32 s12, 0 5770; GFX6-NEXT: s_cselect_b32 s16, 1, 0 5771; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 5772; GFX6-NEXT: s_lshr_b64 s[10:11], s[4:5], s12 5773; GFX6-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 5774; GFX6-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] 5775; GFX6-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 5776; GFX6-NEXT: s_cmp_lg_u32 s15, 0 5777; GFX6-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7] 5778; GFX6-NEXT: s_cmp_lg_u32 s16, 0 5779; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 5780; GFX6-NEXT: s_cmp_lg_u32 s15, 0 5781; GFX6-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 5782; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] 5783; GFX6-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7] 5784; GFX6-NEXT: ; return to shader part epilog 5785; 5786; GFX8-LABEL: s_fshr_i128: 5787; GFX8: ; %bb.0: 5788; GFX8-NEXT: s_movk_i32 s10, 0x7f 5789; GFX8-NEXT: s_mov_b32 s11, 0 5790; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] 5791; GFX8-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] 5792; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5793; GFX8-NEXT: s_lshr_b32 s10, s1, 31 5794; GFX8-NEXT: s_lshl_b64 s[14:15], s[0:1], 1 5795; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[10:11] 5796; GFX8-NEXT: s_sub_i32 s13, s8, 64 5797; GFX8-NEXT: s_sub_i32 s9, 64, s8 5798; GFX8-NEXT: s_cmp_lt_u32 s8, 64 5799; GFX8-NEXT: s_cselect_b32 s16, 1, 0 5800; GFX8-NEXT: s_cmp_eq_u32 s8, 0 5801; GFX8-NEXT: s_cselect_b32 s17, 1, 0 5802; GFX8-NEXT: s_lshl_b64 s[2:3], s[14:15], s8 5803; GFX8-NEXT: s_lshr_b64 s[10:11], s[14:15], s9 5804; GFX8-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 5805; GFX8-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] 5806; GFX8-NEXT: s_lshl_b64 s[10:11], s[14:15], s13 5807; GFX8-NEXT: s_cmp_lg_u32 s16, 0 5808; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 5809; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 5810; GFX8-NEXT: s_cmp_lg_u32 s17, 0 5811; GFX8-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9] 5812; GFX8-NEXT: s_sub_i32 s14, s12, 64 5813; GFX8-NEXT: s_sub_i32 s13, 64, s12 5814; GFX8-NEXT: s_cmp_lt_u32 s12, 64 5815; GFX8-NEXT: s_cselect_b32 s15, 1, 0 5816; GFX8-NEXT: s_cmp_eq_u32 s12, 0 5817; GFX8-NEXT: s_cselect_b32 s16, 1, 0 5818; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 5819; GFX8-NEXT: s_lshr_b64 s[10:11], s[4:5], s12 5820; GFX8-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 5821; GFX8-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] 5822; GFX8-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 5823; GFX8-NEXT: s_cmp_lg_u32 s15, 0 5824; GFX8-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7] 5825; GFX8-NEXT: s_cmp_lg_u32 s16, 0 5826; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 5827; GFX8-NEXT: s_cmp_lg_u32 s15, 0 5828; GFX8-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 5829; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] 5830; GFX8-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7] 5831; GFX8-NEXT: ; return to shader part epilog 5832; 5833; GFX9-LABEL: s_fshr_i128: 5834; GFX9: ; %bb.0: 5835; GFX9-NEXT: s_movk_i32 s10, 0x7f 5836; GFX9-NEXT: s_mov_b32 s11, 0 5837; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] 5838; GFX9-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] 5839; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5840; GFX9-NEXT: s_lshr_b32 s10, s1, 31 5841; GFX9-NEXT: s_lshl_b64 s[14:15], s[0:1], 1 5842; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[10:11] 5843; GFX9-NEXT: s_sub_i32 s13, s8, 64 5844; GFX9-NEXT: s_sub_i32 s9, 64, s8 5845; GFX9-NEXT: s_cmp_lt_u32 s8, 64 5846; GFX9-NEXT: s_cselect_b32 s16, 1, 0 5847; GFX9-NEXT: s_cmp_eq_u32 s8, 0 5848; GFX9-NEXT: s_cselect_b32 s17, 1, 0 5849; GFX9-NEXT: s_lshl_b64 s[2:3], s[14:15], s8 5850; GFX9-NEXT: s_lshr_b64 s[10:11], s[14:15], s9 5851; GFX9-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 5852; GFX9-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] 5853; GFX9-NEXT: s_lshl_b64 s[10:11], s[14:15], s13 5854; GFX9-NEXT: s_cmp_lg_u32 s16, 0 5855; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 5856; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 5857; GFX9-NEXT: s_cmp_lg_u32 s17, 0 5858; GFX9-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9] 5859; GFX9-NEXT: s_sub_i32 s14, s12, 64 5860; GFX9-NEXT: s_sub_i32 s13, 64, s12 5861; GFX9-NEXT: s_cmp_lt_u32 s12, 64 5862; GFX9-NEXT: s_cselect_b32 s15, 1, 0 5863; GFX9-NEXT: s_cmp_eq_u32 s12, 0 5864; GFX9-NEXT: s_cselect_b32 s16, 1, 0 5865; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 5866; GFX9-NEXT: s_lshr_b64 s[10:11], s[4:5], s12 5867; GFX9-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 5868; GFX9-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] 5869; GFX9-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 5870; GFX9-NEXT: s_cmp_lg_u32 s15, 0 5871; GFX9-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7] 5872; GFX9-NEXT: s_cmp_lg_u32 s16, 0 5873; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 5874; GFX9-NEXT: s_cmp_lg_u32 s15, 0 5875; GFX9-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 5876; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] 5877; GFX9-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7] 5878; GFX9-NEXT: ; return to shader part epilog 5879; 5880; GFX10-LABEL: s_fshr_i128: 5881; GFX10: ; %bb.0: 5882; GFX10-NEXT: s_movk_i32 s10, 0x7f 5883; GFX10-NEXT: s_mov_b32 s11, 0 5884; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5885; GFX10-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] 5886; GFX10-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] 5887; GFX10-NEXT: s_lshr_b32 s10, s1, 31 5888; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5889; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[10:11] 5890; GFX10-NEXT: s_sub_i32 s13, s8, 64 5891; GFX10-NEXT: s_sub_i32 s9, 64, s8 5892; GFX10-NEXT: s_cmp_lt_u32 s8, 64 5893; GFX10-NEXT: s_cselect_b32 s16, 1, 0 5894; GFX10-NEXT: s_cmp_eq_u32 s8, 0 5895; GFX10-NEXT: s_cselect_b32 s17, 1, 0 5896; GFX10-NEXT: s_lshr_b64 s[10:11], s[0:1], s9 5897; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], s8 5898; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 5899; GFX10-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15] 5900; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s13 5901; GFX10-NEXT: s_cmp_lg_u32 s16, 0 5902; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 5903; GFX10-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] 5904; GFX10-NEXT: s_cmp_lg_u32 s17, 0 5905; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 5906; GFX10-NEXT: s_sub_i32 s14, s12, 64 5907; GFX10-NEXT: s_sub_i32 s10, 64, s12 5908; GFX10-NEXT: s_cmp_lt_u32 s12, 64 5909; GFX10-NEXT: s_cselect_b32 s15, 1, 0 5910; GFX10-NEXT: s_cmp_eq_u32 s12, 0 5911; GFX10-NEXT: s_cselect_b32 s16, 1, 0 5912; GFX10-NEXT: s_lshr_b64 s[0:1], s[4:5], s12 5913; GFX10-NEXT: s_lshl_b64 s[10:11], s[6:7], s10 5914; GFX10-NEXT: s_lshr_b64 s[12:13], s[6:7], s12 5915; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[10:11] 5916; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 5917; GFX10-NEXT: s_cmp_lg_u32 s15, 0 5918; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[6:7] 5919; GFX10-NEXT: s_cmp_lg_u32 s16, 0 5920; GFX10-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] 5921; GFX10-NEXT: s_cmp_lg_u32 s15, 0 5922; GFX10-NEXT: s_cselect_b64 s[4:5], s[12:13], 0 5923; GFX10-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 5924; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 5925; GFX10-NEXT: ; return to shader part epilog 5926; 5927; GFX11-LABEL: s_fshr_i128: 5928; GFX11: ; %bb.0: 5929; GFX11-NEXT: s_movk_i32 s10, 0x7f 5930; GFX11-NEXT: s_mov_b32 s11, 0 5931; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5932; GFX11-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] 5933; GFX11-NEXT: s_and_not1_b64 s[8:9], s[10:11], s[8:9] 5934; GFX11-NEXT: s_lshr_b32 s10, s1, 31 5935; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5936; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[10:11] 5937; GFX11-NEXT: s_sub_i32 s13, s8, 64 5938; GFX11-NEXT: s_sub_i32 s9, 64, s8 5939; GFX11-NEXT: s_cmp_lt_u32 s8, 64 5940; GFX11-NEXT: s_cselect_b32 s16, 1, 0 5941; GFX11-NEXT: s_cmp_eq_u32 s8, 0 5942; GFX11-NEXT: s_cselect_b32 s17, 1, 0 5943; GFX11-NEXT: s_lshr_b64 s[10:11], s[0:1], s9 5944; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], s8 5945; GFX11-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 5946; GFX11-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15] 5947; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s13 5948; GFX11-NEXT: s_cmp_lg_u32 s16, 0 5949; GFX11-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 5950; GFX11-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] 5951; GFX11-NEXT: s_cmp_lg_u32 s17, 0 5952; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 5953; GFX11-NEXT: s_sub_i32 s14, s12, 64 5954; GFX11-NEXT: s_sub_i32 s10, 64, s12 5955; GFX11-NEXT: s_cmp_lt_u32 s12, 64 5956; GFX11-NEXT: s_cselect_b32 s15, 1, 0 5957; GFX11-NEXT: s_cmp_eq_u32 s12, 0 5958; GFX11-NEXT: s_cselect_b32 s16, 1, 0 5959; GFX11-NEXT: s_lshr_b64 s[0:1], s[4:5], s12 5960; GFX11-NEXT: s_lshl_b64 s[10:11], s[6:7], s10 5961; GFX11-NEXT: s_lshr_b64 s[12:13], s[6:7], s12 5962; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[10:11] 5963; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 5964; GFX11-NEXT: s_cmp_lg_u32 s15, 0 5965; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[6:7] 5966; GFX11-NEXT: s_cmp_lg_u32 s16, 0 5967; GFX11-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] 5968; GFX11-NEXT: s_cmp_lg_u32 s15, 0 5969; GFX11-NEXT: s_cselect_b64 s[4:5], s[12:13], 0 5970; GFX11-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 5971; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 5972; GFX11-NEXT: ; return to shader part epilog 5973 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 5974 ret i128 %result 5975} 5976 5977define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) { 5978; GFX6-LABEL: v_fshr_i128: 5979; GFX6: ; %bb.0: 5980; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5981; GFX6-NEXT: v_and_b32_e32 v14, 0x7f, v8 5982; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 5983; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 5984; GFX6-NEXT: v_and_b32_e32 v15, 0x7f, v8 5985; GFX6-NEXT: v_lshl_b64 v[8:9], v[0:1], 1 5986; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1 5987; GFX6-NEXT: v_or_b32_e32 v2, v2, v0 5988; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v15 5989; GFX6-NEXT: v_lshr_b64 v[0:1], v[8:9], v0 5990; GFX6-NEXT: v_lshl_b64 v[10:11], v[2:3], v15 5991; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v15 5992; GFX6-NEXT: v_lshl_b64 v[12:13], v[8:9], v15 5993; GFX6-NEXT: v_or_b32_e32 v10, v0, v10 5994; GFX6-NEXT: v_or_b32_e32 v11, v1, v11 5995; GFX6-NEXT: v_lshl_b64 v[0:1], v[8:9], v16 5996; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 5997; GFX6-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc 5998; GFX6-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc 5999; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 6000; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 6001; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 6002; GFX6-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc 6003; GFX6-NEXT: v_cndmask_b32_e32 v11, v1, v3, vcc 6004; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v14 6005; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], v14 6006; GFX6-NEXT: v_lshl_b64 v[2:3], v[6:7], v2 6007; GFX6-NEXT: v_subrev_i32_e32 v15, vcc, 64, v14 6008; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 6009; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 6010; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], v15 6011; GFX6-NEXT: v_lshr_b64 v[8:9], v[6:7], v14 6012; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 6013; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6014; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6015; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 6016; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] 6017; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] 6018; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6019; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6020; GFX6-NEXT: v_or_b32_e32 v0, v12, v0 6021; GFX6-NEXT: v_or_b32_e32 v1, v13, v1 6022; GFX6-NEXT: v_or_b32_e32 v2, v10, v2 6023; GFX6-NEXT: v_or_b32_e32 v3, v11, v3 6024; GFX6-NEXT: s_setpc_b64 s[30:31] 6025; 6026; GFX8-LABEL: v_fshr_i128: 6027; GFX8: ; %bb.0: 6028; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6029; GFX8-NEXT: v_and_b32_e32 v14, 0x7f, v8 6030; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8 6031; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6032; GFX8-NEXT: v_and_b32_e32 v15, 0x7f, v8 6033; GFX8-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1] 6034; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1 6035; GFX8-NEXT: v_or_b32_e32 v2, v2, v0 6036; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v15 6037; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, v[8:9] 6038; GFX8-NEXT: v_lshlrev_b64 v[10:11], v15, v[2:3] 6039; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v15 6040; GFX8-NEXT: v_lshlrev_b64 v[12:13], v15, v[8:9] 6041; GFX8-NEXT: v_or_b32_e32 v10, v0, v10 6042; GFX8-NEXT: v_or_b32_e32 v11, v1, v11 6043; GFX8-NEXT: v_lshlrev_b64 v[0:1], v16, v[8:9] 6044; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 6045; GFX8-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc 6046; GFX8-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc 6047; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 6048; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 6049; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 6050; GFX8-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc 6051; GFX8-NEXT: v_cndmask_b32_e32 v11, v1, v3, vcc 6052; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v14 6053; GFX8-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5] 6054; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7] 6055; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, 64, v14 6056; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 6057; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 6058; GFX8-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7] 6059; GFX8-NEXT: v_lshrrev_b64 v[8:9], v14, v[6:7] 6060; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 6061; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6062; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6063; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 6064; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] 6065; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] 6066; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6067; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6068; GFX8-NEXT: v_or_b32_e32 v0, v12, v0 6069; GFX8-NEXT: v_or_b32_e32 v1, v13, v1 6070; GFX8-NEXT: v_or_b32_e32 v2, v10, v2 6071; GFX8-NEXT: v_or_b32_e32 v3, v11, v3 6072; GFX8-NEXT: s_setpc_b64 s[30:31] 6073; 6074; GFX9-LABEL: v_fshr_i128: 6075; GFX9: ; %bb.0: 6076; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6077; GFX9-NEXT: v_and_b32_e32 v14, 0x7f, v8 6078; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8 6079; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6080; GFX9-NEXT: v_and_b32_e32 v15, 0x7f, v8 6081; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1] 6082; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1 6083; GFX9-NEXT: v_or_b32_e32 v2, v2, v0 6084; GFX9-NEXT: v_sub_u32_e32 v0, 64, v15 6085; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, v[8:9] 6086; GFX9-NEXT: v_lshlrev_b64 v[10:11], v15, v[2:3] 6087; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v15 6088; GFX9-NEXT: v_lshlrev_b64 v[12:13], v15, v[8:9] 6089; GFX9-NEXT: v_or_b32_e32 v10, v0, v10 6090; GFX9-NEXT: v_or_b32_e32 v11, v1, v11 6091; GFX9-NEXT: v_lshlrev_b64 v[0:1], v16, v[8:9] 6092; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 6093; GFX9-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc 6094; GFX9-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc 6095; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 6096; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 6097; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 6098; GFX9-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc 6099; GFX9-NEXT: v_sub_u32_e32 v2, 64, v14 6100; GFX9-NEXT: v_cndmask_b32_e32 v11, v1, v3, vcc 6101; GFX9-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5] 6102; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7] 6103; GFX9-NEXT: v_subrev_u32_e32 v15, 64, v14 6104; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 6105; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 6106; GFX9-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7] 6107; GFX9-NEXT: v_lshrrev_b64 v[8:9], v14, v[6:7] 6108; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 6109; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6110; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6111; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 6112; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] 6113; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] 6114; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6115; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6116; GFX9-NEXT: v_or_b32_e32 v0, v12, v0 6117; GFX9-NEXT: v_or_b32_e32 v1, v13, v1 6118; GFX9-NEXT: v_or_b32_e32 v2, v10, v2 6119; GFX9-NEXT: v_or_b32_e32 v3, v11, v3 6120; GFX9-NEXT: s_setpc_b64 s[30:31] 6121; 6122; GFX10-LABEL: v_fshr_i128: 6123; GFX10: ; %bb.0: 6124; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6125; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 6126; GFX10-NEXT: v_xor_b32_e32 v9, -1, v8 6127; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6128; GFX10-NEXT: v_lshrrev_b32_e32 v10, 31, v1 6129; GFX10-NEXT: v_and_b32_e32 v19, 0x7f, v8 6130; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 6131; GFX10-NEXT: v_and_b32_e32 v18, 0x7f, v9 6132; GFX10-NEXT: v_or_b32_e32 v2, v2, v10 6133; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19 6134; GFX10-NEXT: v_subrev_nc_u32_e32 v21, 64, v19 6135; GFX10-NEXT: v_sub_nc_u32_e32 v10, 64, v18 6136; GFX10-NEXT: v_subrev_nc_u32_e32 v20, 64, v18 6137; GFX10-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3] 6138; GFX10-NEXT: v_lshrrev_b64 v[12:13], v19, v[4:5] 6139; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7] 6140; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1] 6141; GFX10-NEXT: v_lshlrev_b64 v[14:15], v18, v[0:1] 6142; GFX10-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1] 6143; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18 6144; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v19 6145; GFX10-NEXT: v_or_b32_e32 v12, v12, v16 6146; GFX10-NEXT: v_or_b32_e32 v10, v10, v8 6147; GFX10-NEXT: v_or_b32_e32 v11, v11, v9 6148; GFX10-NEXT: v_lshrrev_b64 v[8:9], v21, v[6:7] 6149; GFX10-NEXT: v_or_b32_e32 v13, v13, v17 6150; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v19 6151; GFX10-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo 6152; GFX10-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo 6153; GFX10-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7] 6154; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v12, s4 6155; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v18 6156; GFX10-NEXT: v_cndmask_b32_e64 v6, v9, v13, s4 6157; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc_lo 6158; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v15, vcc_lo 6159; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v4, s5 6160; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s6 6161; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s6 6162; GFX10-NEXT: v_cndmask_b32_e64 v5, v6, v5, s5 6163; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v0, s4 6164; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v1, s4 6165; GFX10-NEXT: v_or_b32_e32 v0, v14, v4 6166; GFX10-NEXT: v_or_b32_e32 v1, v7, v5 6167; GFX10-NEXT: v_or_b32_e32 v2, v2, v6 6168; GFX10-NEXT: v_or_b32_e32 v3, v3, v8 6169; GFX10-NEXT: s_setpc_b64 s[30:31] 6170; 6171; GFX11-LABEL: v_fshr_i128: 6172; GFX11: ; %bb.0: 6173; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6174; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 6175; GFX11-NEXT: v_xor_b32_e32 v9, -1, v8 6176; GFX11-NEXT: v_lshrrev_b32_e32 v10, 31, v1 6177; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 6178; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6179; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 6180; GFX11-NEXT: v_and_b32_e32 v18, 0x7f, v9 6181; GFX11-NEXT: v_or_b32_e32 v2, v2, v10 6182; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 6183; GFX11-NEXT: v_lshlrev_b64 v[14:15], v18, v[0:1] 6184; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18 6185; GFX11-NEXT: v_and_b32_e32 v19, 0x7f, v8 6186; GFX11-NEXT: v_sub_nc_u32_e32 v10, 64, v18 6187; GFX11-NEXT: v_subrev_nc_u32_e32 v20, 64, v18 6188; GFX11-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3] 6189; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc_lo 6190; GFX11-NEXT: v_sub_nc_u32_e32 v16, 64, v19 6191; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1] 6192; GFX11-NEXT: v_subrev_nc_u32_e32 v21, 64, v19 6193; GFX11-NEXT: v_lshrrev_b64 v[12:13], v19, v[4:5] 6194; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1] 6195; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7] 6196; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v19 6197; GFX11-NEXT: v_or_b32_e32 v10, v10, v8 6198; GFX11-NEXT: v_or_b32_e32 v11, v11, v9 6199; GFX11-NEXT: v_lshrrev_b64 v[8:9], v21, v[6:7] 6200; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v19 6201; GFX11-NEXT: v_or_b32_e32 v12, v12, v16 6202; GFX11-NEXT: v_or_b32_e32 v13, v13, v17 6203; GFX11-NEXT: v_dual_cndmask_b32 v10, v0, v10 :: v_dual_cndmask_b32 v11, v1, v11 6204; GFX11-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7] 6205; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v15, vcc_lo 6206; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v12, s0 6207; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18 6208; GFX11-NEXT: v_cndmask_b32_e64 v6, v9, v13, s0 6209; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 6210; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1 6211; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s2 6212; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s2 6213; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4) 6214; GFX11-NEXT: v_cndmask_b32_e64 v5, v6, v5, s1 6215; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v0, s0 6216; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v1, s0 6217; GFX11-NEXT: v_or_b32_e32 v0, v14, v4 6218; GFX11-NEXT: v_or_b32_e32 v1, v7, v5 6219; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 6220; GFX11-NEXT: v_or_b32_e32 v2, v2, v6 6221; GFX11-NEXT: v_or_b32_e32 v3, v3, v8 6222; GFX11-NEXT: s_setpc_b64 s[30:31] 6223 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 6224 ret i128 %result 6225} 6226 6227define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) { 6228; GFX6-LABEL: v_fshr_i128_ssv: 6229; GFX6: ; %bb.0: 6230; GFX6-NEXT: v_and_b32_e32 v6, 0x7f, v0 6231; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 6232; GFX6-NEXT: s_mov_b32 s9, 0 6233; GFX6-NEXT: v_and_b32_e32 v7, 0x7f, v0 6234; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6235; GFX6-NEXT: s_lshr_b32 s8, s1, 31 6236; GFX6-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 6237; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 6238; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v7 6239; GFX6-NEXT: v_lshr_b64 v[0:1], s[10:11], v0 6240; GFX6-NEXT: v_lshl_b64 v[2:3], s[0:1], v7 6241; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v7 6242; GFX6-NEXT: v_lshl_b64 v[4:5], s[10:11], v7 6243; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 6244; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 6245; GFX6-NEXT: v_lshl_b64 v[0:1], s[10:11], v8 6246; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 6247; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc 6248; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc 6249; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6250; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6251; GFX6-NEXT: v_mov_b32_e32 v2, s0 6252; GFX6-NEXT: v_mov_b32_e32 v3, s1 6253; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 6254; GFX6-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc 6255; GFX6-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc 6256; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v6 6257; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v6 6258; GFX6-NEXT: v_lshl_b64 v[2:3], s[6:7], v2 6259; GFX6-NEXT: v_subrev_i32_e32 v11, vcc, 64, v6 6260; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 6261; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 6262; GFX6-NEXT: v_lshr_b64 v[0:1], s[6:7], v11 6263; GFX6-NEXT: v_lshr_b64 v[4:5], s[6:7], v6 6264; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6 6265; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6266; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6267; GFX6-NEXT: v_mov_b32_e32 v2, s4 6268; GFX6-NEXT: v_mov_b32_e32 v3, s5 6269; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 6270; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 6271; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 6272; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc 6273; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 6274; GFX6-NEXT: v_or_b32_e32 v0, v8, v0 6275; GFX6-NEXT: v_or_b32_e32 v1, v9, v1 6276; GFX6-NEXT: v_or_b32_e32 v2, v7, v2 6277; GFX6-NEXT: v_or_b32_e32 v3, v10, v3 6278; GFX6-NEXT: ; return to shader part epilog 6279; 6280; GFX8-LABEL: v_fshr_i128_ssv: 6281; GFX8: ; %bb.0: 6282; GFX8-NEXT: v_and_b32_e32 v6, 0x7f, v0 6283; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 6284; GFX8-NEXT: s_mov_b32 s9, 0 6285; GFX8-NEXT: v_and_b32_e32 v7, 0x7f, v0 6286; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6287; GFX8-NEXT: s_lshr_b32 s8, s1, 31 6288; GFX8-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 6289; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 6290; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v7 6291; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[10:11] 6292; GFX8-NEXT: v_lshlrev_b64 v[2:3], v7, s[0:1] 6293; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v7 6294; GFX8-NEXT: v_lshlrev_b64 v[4:5], v7, s[10:11] 6295; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 6296; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 6297; GFX8-NEXT: v_lshlrev_b64 v[0:1], v8, s[10:11] 6298; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 6299; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc 6300; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc 6301; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6302; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6303; GFX8-NEXT: v_mov_b32_e32 v2, s0 6304; GFX8-NEXT: v_mov_b32_e32 v3, s1 6305; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 6306; GFX8-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc 6307; GFX8-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc 6308; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v6 6309; GFX8-NEXT: v_lshrrev_b64 v[0:1], v6, s[4:5] 6310; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7] 6311; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, 64, v6 6312; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 6313; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 6314; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7] 6315; GFX8-NEXT: v_lshrrev_b64 v[4:5], v6, s[6:7] 6316; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6 6317; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6318; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6319; GFX8-NEXT: v_mov_b32_e32 v2, s4 6320; GFX8-NEXT: v_mov_b32_e32 v3, s5 6321; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 6322; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 6323; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 6324; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc 6325; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 6326; GFX8-NEXT: v_or_b32_e32 v0, v8, v0 6327; GFX8-NEXT: v_or_b32_e32 v1, v9, v1 6328; GFX8-NEXT: v_or_b32_e32 v2, v7, v2 6329; GFX8-NEXT: v_or_b32_e32 v3, v10, v3 6330; GFX8-NEXT: ; return to shader part epilog 6331; 6332; GFX9-LABEL: v_fshr_i128_ssv: 6333; GFX9: ; %bb.0: 6334; GFX9-NEXT: v_and_b32_e32 v6, 0x7f, v0 6335; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 6336; GFX9-NEXT: s_mov_b32 s9, 0 6337; GFX9-NEXT: v_and_b32_e32 v7, 0x7f, v0 6338; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6339; GFX9-NEXT: s_lshr_b32 s8, s1, 31 6340; GFX9-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 6341; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 6342; GFX9-NEXT: v_sub_u32_e32 v0, 64, v7 6343; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[10:11] 6344; GFX9-NEXT: v_lshlrev_b64 v[2:3], v7, s[0:1] 6345; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v7 6346; GFX9-NEXT: v_lshlrev_b64 v[4:5], v7, s[10:11] 6347; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 6348; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 6349; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, s[10:11] 6350; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 6351; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc 6352; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc 6353; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6354; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6355; GFX9-NEXT: v_mov_b32_e32 v2, s0 6356; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 6357; GFX9-NEXT: v_mov_b32_e32 v3, s1 6358; GFX9-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc 6359; GFX9-NEXT: v_sub_u32_e32 v2, 64, v6 6360; GFX9-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc 6361; GFX9-NEXT: v_lshrrev_b64 v[0:1], v6, s[4:5] 6362; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7] 6363; GFX9-NEXT: v_subrev_u32_e32 v11, 64, v6 6364; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 6365; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 6366; GFX9-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7] 6367; GFX9-NEXT: v_lshrrev_b64 v[4:5], v6, s[6:7] 6368; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6 6369; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6370; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6371; GFX9-NEXT: v_mov_b32_e32 v2, s4 6372; GFX9-NEXT: v_mov_b32_e32 v3, s5 6373; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 6374; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 6375; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 6376; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc 6377; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 6378; GFX9-NEXT: v_or_b32_e32 v0, v8, v0 6379; GFX9-NEXT: v_or_b32_e32 v1, v9, v1 6380; GFX9-NEXT: v_or_b32_e32 v2, v7, v2 6381; GFX9-NEXT: v_or_b32_e32 v3, v10, v3 6382; GFX9-NEXT: ; return to shader part epilog 6383; 6384; GFX10-LABEL: v_fshr_i128_ssv: 6385; GFX10: ; %bb.0: 6386; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 6387; GFX10-NEXT: v_and_b32_e32 v13, 0x7f, v0 6388; GFX10-NEXT: s_mov_b32 s9, 0 6389; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6390; GFX10-NEXT: s_lshr_b32 s8, s1, 31 6391; GFX10-NEXT: v_and_b32_e32 v12, 0x7f, v1 6392; GFX10-NEXT: v_sub_nc_u32_e32 v8, 64, v13 6393; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 6394; GFX10-NEXT: s_or_b64 s[8:9], s[2:3], s[8:9] 6395; GFX10-NEXT: v_subrev_nc_u32_e32 v14, 64, v13 6396; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v12 6397; GFX10-NEXT: v_lshlrev_b64 v[0:1], v12, s[8:9] 6398; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v12 6399; GFX10-NEXT: v_lshrrev_b64 v[4:5], v13, s[4:5] 6400; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7] 6401; GFX10-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1] 6402; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12 6403; GFX10-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1] 6404; GFX10-NEXT: v_lshlrev_b64 v[6:7], v12, s[0:1] 6405; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 64, v13 6406; GFX10-NEXT: v_or_b32_e32 v4, v4, v8 6407; GFX10-NEXT: v_or_b32_e32 v2, v2, v0 6408; GFX10-NEXT: v_or_b32_e32 v3, v3, v1 6409; GFX10-NEXT: v_lshrrev_b64 v[0:1], v14, s[6:7] 6410; GFX10-NEXT: v_or_b32_e32 v5, v5, v9 6411; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 0, v13 6412; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo 6413; GFX10-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo 6414; GFX10-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7] 6415; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 6416; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v12 6417; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s0 6418; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo 6419; GFX10-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc_lo 6420; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, s1 6421; GFX10-NEXT: v_cndmask_b32_e64 v5, v8, s8, s2 6422; GFX10-NEXT: v_cndmask_b32_e64 v7, v10, s9, s2 6423; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s5, s1 6424; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 6425; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 6426; GFX10-NEXT: v_or_b32_e32 v0, v6, v0 6427; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 6428; GFX10-NEXT: v_or_b32_e32 v2, v5, v2 6429; GFX10-NEXT: v_or_b32_e32 v3, v7, v3 6430; GFX10-NEXT: ; return to shader part epilog 6431; 6432; GFX11-LABEL: v_fshr_i128_ssv: 6433; GFX11: ; %bb.0: 6434; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0 6435; GFX11-NEXT: s_lshr_b32 s8, s1, 31 6436; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 6437; GFX11-NEXT: s_mov_b32 s9, 0 6438; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6439; GFX11-NEXT: v_and_b32_e32 v12, 0x7f, v1 6440; GFX11-NEXT: s_or_b64 s[8:9], s[2:3], s[8:9] 6441; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 6442; GFX11-NEXT: v_lshlrev_b64 v[6:7], v12, s[0:1] 6443; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12 6444; GFX11-NEXT: v_and_b32_e32 v13, 0x7f, v0 6445; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v12 6446; GFX11-NEXT: v_lshlrev_b64 v[0:1], v12, s[8:9] 6447; GFX11-NEXT: v_subrev_nc_u32_e32 v10, 64, v12 6448; GFX11-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo 6449; GFX11-NEXT: v_sub_nc_u32_e32 v8, 64, v13 6450; GFX11-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1] 6451; GFX11-NEXT: v_subrev_nc_u32_e32 v14, 64, v13 6452; GFX11-NEXT: v_lshrrev_b64 v[4:5], v13, s[4:5] 6453; GFX11-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1] 6454; GFX11-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7] 6455; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v13 6456; GFX11-NEXT: v_or_b32_e32 v2, v2, v0 6457; GFX11-NEXT: v_or_b32_e32 v3, v3, v1 6458; GFX11-NEXT: v_lshrrev_b64 v[0:1], v14, s[6:7] 6459; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v13 6460; GFX11-NEXT: v_or_b32_e32 v4, v4, v8 6461; GFX11-NEXT: v_or_b32_e32 v5, v5, v9 6462; GFX11-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo 6463; GFX11-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo 6464; GFX11-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7] 6465; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 6466; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v12 6467; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v5, s0 6468; GFX11-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc_lo 6469; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 6470; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s4, s1 6471; GFX11-NEXT: v_cndmask_b32_e64 v5, v8, s8, s2 6472; GFX11-NEXT: v_cndmask_b32_e64 v7, v10, s9, s2 6473; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s5, s1 6474; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 6475; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 6476; GFX11-NEXT: v_or_b32_e32 v0, v6, v0 6477; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 6478; GFX11-NEXT: v_or_b32_e32 v1, v4, v1 6479; GFX11-NEXT: v_or_b32_e32 v2, v5, v2 6480; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 6481; GFX11-NEXT: v_or_b32_e32 v3, v7, v3 6482; GFX11-NEXT: ; return to shader part epilog 6483 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 6484 %cast.result = bitcast i128 %result to <4 x float> 6485 ret <4 x float> %cast.result 6486} 6487 6488define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) { 6489; GFX6-LABEL: v_fshr_i128_svs: 6490; GFX6: ; %bb.0: 6491; GFX6-NEXT: s_movk_i32 s6, 0x7f 6492; GFX6-NEXT: s_mov_b32 s7, 0 6493; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 6494; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 6495; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6496; GFX6-NEXT: s_lshr_b32 s6, s1, 31 6497; GFX6-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 6498; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7] 6499; GFX6-NEXT: s_sub_i32 s9, s4, 64 6500; GFX6-NEXT: s_sub_i32 s5, 64, s4 6501; GFX6-NEXT: s_cmp_lt_u32 s4, 64 6502; GFX6-NEXT: s_cselect_b32 s12, 1, 0 6503; GFX6-NEXT: s_cmp_eq_u32 s4, 0 6504; GFX6-NEXT: s_cselect_b32 s13, 1, 0 6505; GFX6-NEXT: s_lshl_b64 s[2:3], s[10:11], s4 6506; GFX6-NEXT: s_lshr_b64 s[6:7], s[10:11], s5 6507; GFX6-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 6508; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 6509; GFX6-NEXT: s_lshl_b64 s[6:7], s[10:11], s9 6510; GFX6-NEXT: s_cmp_lg_u32 s12, 0 6511; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 6512; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 6513; GFX6-NEXT: s_cmp_lg_u32 s13, 0 6514; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] 6515; GFX6-NEXT: s_sub_i32 s4, s8, 64 6516; GFX6-NEXT: s_sub_i32 s5, 64, s8 6517; GFX6-NEXT: s_cmp_lt_u32 s8, 64 6518; GFX6-NEXT: s_cselect_b32 s6, 1, 0 6519; GFX6-NEXT: s_cmp_eq_u32 s8, 0 6520; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s8 6521; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s5 6522; GFX6-NEXT: s_cselect_b32 s7, 1, 0 6523; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], s8 6524; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s4 6525; GFX6-NEXT: s_and_b32 s4, 1, s6 6526; GFX6-NEXT: v_or_b32_e32 v4, v4, v6 6527; GFX6-NEXT: v_or_b32_e32 v5, v5, v7 6528; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 6529; GFX6-NEXT: s_and_b32 s4, 1, s7 6530; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6531; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 6532; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 6533; GFX6-NEXT: s_and_b32 s4, 1, s6 6534; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 6535; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 6536; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 6537; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6538; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6539; GFX6-NEXT: v_or_b32_e32 v0, s2, v0 6540; GFX6-NEXT: v_or_b32_e32 v1, s3, v1 6541; GFX6-NEXT: v_or_b32_e32 v2, s0, v2 6542; GFX6-NEXT: v_or_b32_e32 v3, s1, v3 6543; GFX6-NEXT: ; return to shader part epilog 6544; 6545; GFX8-LABEL: v_fshr_i128_svs: 6546; GFX8: ; %bb.0: 6547; GFX8-NEXT: s_movk_i32 s6, 0x7f 6548; GFX8-NEXT: s_mov_b32 s7, 0 6549; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 6550; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 6551; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6552; GFX8-NEXT: s_lshr_b32 s6, s1, 31 6553; GFX8-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 6554; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7] 6555; GFX8-NEXT: s_sub_i32 s9, s4, 64 6556; GFX8-NEXT: s_sub_i32 s5, 64, s4 6557; GFX8-NEXT: s_cmp_lt_u32 s4, 64 6558; GFX8-NEXT: s_cselect_b32 s12, 1, 0 6559; GFX8-NEXT: s_cmp_eq_u32 s4, 0 6560; GFX8-NEXT: s_cselect_b32 s13, 1, 0 6561; GFX8-NEXT: s_lshl_b64 s[2:3], s[10:11], s4 6562; GFX8-NEXT: s_lshr_b64 s[6:7], s[10:11], s5 6563; GFX8-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 6564; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 6565; GFX8-NEXT: s_lshl_b64 s[6:7], s[10:11], s9 6566; GFX8-NEXT: s_cmp_lg_u32 s12, 0 6567; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 6568; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 6569; GFX8-NEXT: s_cmp_lg_u32 s13, 0 6570; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] 6571; GFX8-NEXT: s_sub_i32 s4, s8, 64 6572; GFX8-NEXT: s_sub_i32 s5, 64, s8 6573; GFX8-NEXT: s_cmp_lt_u32 s8, 64 6574; GFX8-NEXT: s_cselect_b32 s6, 1, 0 6575; GFX8-NEXT: s_cmp_eq_u32 s8, 0 6576; GFX8-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] 6577; GFX8-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] 6578; GFX8-NEXT: s_cselect_b32 s7, 1, 0 6579; GFX8-NEXT: v_lshrrev_b64 v[8:9], s8, v[2:3] 6580; GFX8-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] 6581; GFX8-NEXT: s_and_b32 s4, 1, s6 6582; GFX8-NEXT: v_or_b32_e32 v4, v4, v6 6583; GFX8-NEXT: v_or_b32_e32 v5, v5, v7 6584; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 6585; GFX8-NEXT: s_and_b32 s4, 1, s7 6586; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6587; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 6588; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 6589; GFX8-NEXT: s_and_b32 s4, 1, s6 6590; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 6591; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 6592; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 6593; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6594; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6595; GFX8-NEXT: v_or_b32_e32 v0, s2, v0 6596; GFX8-NEXT: v_or_b32_e32 v1, s3, v1 6597; GFX8-NEXT: v_or_b32_e32 v2, s0, v2 6598; GFX8-NEXT: v_or_b32_e32 v3, s1, v3 6599; GFX8-NEXT: ; return to shader part epilog 6600; 6601; GFX9-LABEL: v_fshr_i128_svs: 6602; GFX9: ; %bb.0: 6603; GFX9-NEXT: s_movk_i32 s6, 0x7f 6604; GFX9-NEXT: s_mov_b32 s7, 0 6605; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 6606; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 6607; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6608; GFX9-NEXT: s_lshr_b32 s6, s1, 31 6609; GFX9-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 6610; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7] 6611; GFX9-NEXT: s_sub_i32 s9, s4, 64 6612; GFX9-NEXT: s_sub_i32 s5, 64, s4 6613; GFX9-NEXT: s_cmp_lt_u32 s4, 64 6614; GFX9-NEXT: s_cselect_b32 s12, 1, 0 6615; GFX9-NEXT: s_cmp_eq_u32 s4, 0 6616; GFX9-NEXT: s_cselect_b32 s13, 1, 0 6617; GFX9-NEXT: s_lshl_b64 s[2:3], s[10:11], s4 6618; GFX9-NEXT: s_lshr_b64 s[6:7], s[10:11], s5 6619; GFX9-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 6620; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 6621; GFX9-NEXT: s_lshl_b64 s[6:7], s[10:11], s9 6622; GFX9-NEXT: s_cmp_lg_u32 s12, 0 6623; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 6624; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 6625; GFX9-NEXT: s_cmp_lg_u32 s13, 0 6626; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] 6627; GFX9-NEXT: s_sub_i32 s4, s8, 64 6628; GFX9-NEXT: s_sub_i32 s5, 64, s8 6629; GFX9-NEXT: s_cmp_lt_u32 s8, 64 6630; GFX9-NEXT: s_cselect_b32 s6, 1, 0 6631; GFX9-NEXT: s_cmp_eq_u32 s8, 0 6632; GFX9-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] 6633; GFX9-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] 6634; GFX9-NEXT: s_cselect_b32 s7, 1, 0 6635; GFX9-NEXT: v_lshrrev_b64 v[8:9], s8, v[2:3] 6636; GFX9-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] 6637; GFX9-NEXT: s_and_b32 s4, 1, s6 6638; GFX9-NEXT: v_or_b32_e32 v4, v4, v6 6639; GFX9-NEXT: v_or_b32_e32 v5, v5, v7 6640; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 6641; GFX9-NEXT: s_and_b32 s4, 1, s7 6642; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6643; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 6644; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 6645; GFX9-NEXT: s_and_b32 s4, 1, s6 6646; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 6647; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 6648; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 6649; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6650; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6651; GFX9-NEXT: v_or_b32_e32 v0, s2, v0 6652; GFX9-NEXT: v_or_b32_e32 v1, s3, v1 6653; GFX9-NEXT: v_or_b32_e32 v2, s0, v2 6654; GFX9-NEXT: v_or_b32_e32 v3, s1, v3 6655; GFX9-NEXT: ; return to shader part epilog 6656; 6657; GFX10-LABEL: v_fshr_i128_svs: 6658; GFX10: ; %bb.0: 6659; GFX10-NEXT: s_movk_i32 s6, 0x7f 6660; GFX10-NEXT: s_mov_b32 s7, 0 6661; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6662; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 6663; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 6664; GFX10-NEXT: s_lshr_b32 s6, s1, 31 6665; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 6666; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] 6667; GFX10-NEXT: s_sub_i32 s9, s4, 64 6668; GFX10-NEXT: s_sub_i32 s5, 64, s4 6669; GFX10-NEXT: s_cmp_lt_u32 s4, 64 6670; GFX10-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] 6671; GFX10-NEXT: s_cselect_b32 s12, 1, 0 6672; GFX10-NEXT: s_cmp_eq_u32 s4, 0 6673; GFX10-NEXT: s_cselect_b32 s13, 1, 0 6674; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s5 6675; GFX10-NEXT: s_lshl_b64 s[10:11], s[2:3], s4 6676; GFX10-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 6677; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] 6678; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 6679; GFX10-NEXT: s_cmp_lg_u32 s12, 0 6680; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], 0 6681; GFX10-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] 6682; GFX10-NEXT: s_cmp_lg_u32 s13, 0 6683; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 6684; GFX10-NEXT: s_sub_i32 s0, 64, s8 6685; GFX10-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3] 6686; GFX10-NEXT: s_sub_i32 s0, s8, 64 6687; GFX10-NEXT: s_cmp_lt_u32 s8, 64 6688; GFX10-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] 6689; GFX10-NEXT: s_cselect_b32 s1, 1, 0 6690; GFX10-NEXT: s_cmp_eq_u32 s8, 0 6691; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 6692; GFX10-NEXT: s_cselect_b32 s6, 1, 0 6693; GFX10-NEXT: s_and_b32 s0, 1, s1 6694; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 6695; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 6696; GFX10-NEXT: s_and_b32 s0, 1, s6 6697; GFX10-NEXT: s_and_b32 s1, 1, s1 6698; GFX10-NEXT: v_lshrrev_b64 v[2:3], s8, v[2:3] 6699; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc_lo 6700; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc_lo 6701; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 6702; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 6703; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo 6704; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo 6705; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 6706; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 6707; GFX10-NEXT: v_or_b32_e32 v0, s4, v0 6708; GFX10-NEXT: v_or_b32_e32 v1, s5, v1 6709; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 6710; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 6711; GFX10-NEXT: ; return to shader part epilog 6712; 6713; GFX11-LABEL: v_fshr_i128_svs: 6714; GFX11: ; %bb.0: 6715; GFX11-NEXT: s_movk_i32 s6, 0x7f 6716; GFX11-NEXT: s_mov_b32 s7, 0 6717; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6718; GFX11-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 6719; GFX11-NEXT: s_and_not1_b64 s[4:5], s[6:7], s[4:5] 6720; GFX11-NEXT: s_lshr_b32 s6, s1, 31 6721; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 6722; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] 6723; GFX11-NEXT: s_sub_i32 s9, s4, 64 6724; GFX11-NEXT: s_sub_i32 s5, 64, s4 6725; GFX11-NEXT: s_cmp_lt_u32 s4, 64 6726; GFX11-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] 6727; GFX11-NEXT: s_cselect_b32 s12, 1, 0 6728; GFX11-NEXT: s_cmp_eq_u32 s4, 0 6729; GFX11-NEXT: s_cselect_b32 s13, 1, 0 6730; GFX11-NEXT: s_lshr_b64 s[6:7], s[0:1], s5 6731; GFX11-NEXT: s_lshl_b64 s[10:11], s[2:3], s4 6732; GFX11-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 6733; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] 6734; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 6735; GFX11-NEXT: s_cmp_lg_u32 s12, 0 6736; GFX11-NEXT: s_cselect_b64 s[4:5], s[4:5], 0 6737; GFX11-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] 6738; GFX11-NEXT: s_cmp_lg_u32 s13, 0 6739; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 6740; GFX11-NEXT: s_sub_i32 s0, 64, s8 6741; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 6742; GFX11-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3] 6743; GFX11-NEXT: s_sub_i32 s0, s8, 64 6744; GFX11-NEXT: s_cmp_lt_u32 s8, 64 6745; GFX11-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] 6746; GFX11-NEXT: s_cselect_b32 s1, 1, 0 6747; GFX11-NEXT: s_cmp_eq_u32 s8, 0 6748; GFX11-NEXT: v_or_b32_e32 v4, v4, v6 6749; GFX11-NEXT: s_cselect_b32 s6, 1, 0 6750; GFX11-NEXT: s_and_b32 s0, 1, s1 6751; GFX11-NEXT: v_or_b32_e32 v5, v5, v7 6752; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 6753; GFX11-NEXT: s_and_b32 s0, 1, s6 6754; GFX11-NEXT: s_and_b32 s1, 1, s1 6755; GFX11-NEXT: v_lshrrev_b64 v[2:3], s8, v[2:3] 6756; GFX11-NEXT: v_dual_cndmask_b32 v4, v8, v4 :: v_dual_cndmask_b32 v5, v9, v5 6757; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 6758; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 6759; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 6760; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v0 :: v_dual_cndmask_b32 v1, v5, v1 6761; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 6762; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 6763; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) 6764; GFX11-NEXT: v_or_b32_e32 v0, s4, v0 6765; GFX11-NEXT: v_or_b32_e32 v1, s5, v1 6766; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 6767; GFX11-NEXT: v_or_b32_e32 v2, s2, v2 6768; GFX11-NEXT: v_or_b32_e32 v3, s3, v3 6769; GFX11-NEXT: ; return to shader part epilog 6770 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 6771 %cast.result = bitcast i128 %result to <4 x float> 6772 ret <4 x float> %cast.result 6773} 6774 6775define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) { 6776; GFX6-LABEL: v_fshr_i128_vss: 6777; GFX6: ; %bb.0: 6778; GFX6-NEXT: s_mov_b64 s[6:7], 0x7f 6779; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 6780; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 6781; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 6782; GFX6-NEXT: s_sub_i32 s5, s4, 64 6783; GFX6-NEXT: s_sub_i32 s6, 64, s4 6784; GFX6-NEXT: v_lshl_b64 v[4:5], v[0:1], 1 6785; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1 6786; GFX6-NEXT: s_cmp_lt_u32 s4, 64 6787; GFX6-NEXT: v_or_b32_e32 v2, v2, v0 6788; GFX6-NEXT: s_cselect_b32 s7, 1, 0 6789; GFX6-NEXT: s_cmp_eq_u32 s4, 0 6790; GFX6-NEXT: s_cselect_b32 s9, 1, 0 6791; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], s6 6792; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s4 6793; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], s4 6794; GFX6-NEXT: s_and_b32 s4, 1, s7 6795; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 6796; GFX6-NEXT: s_and_b32 s4, 1, s9 6797; GFX6-NEXT: s_sub_i32 s10, s8, 64 6798; GFX6-NEXT: s_sub_i32 s9, 64, s8 6799; GFX6-NEXT: v_or_b32_e32 v6, v0, v6 6800; GFX6-NEXT: v_or_b32_e32 v7, v1, v7 6801; GFX6-NEXT: v_lshl_b64 v[0:1], v[4:5], s5 6802; GFX6-NEXT: s_cmp_lt_u32 s8, 64 6803; GFX6-NEXT: s_cselect_b32 s11, 1, 0 6804; GFX6-NEXT: s_cmp_eq_u32 s8, 0 6805; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc 6806; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc 6807; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 6808; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 6809; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 6810; GFX6-NEXT: s_cselect_b32 s12, 1, 0 6811; GFX6-NEXT: s_lshr_b64 s[4:5], s[2:3], s8 6812; GFX6-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 6813; GFX6-NEXT: s_lshl_b64 s[8:9], s[2:3], s9 6814; GFX6-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 6815; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 6816; GFX6-NEXT: s_cmp_lg_u32 s11, 0 6817; GFX6-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] 6818; GFX6-NEXT: s_cmp_lg_u32 s12, 0 6819; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 6820; GFX6-NEXT: s_cmp_lg_u32 s11, 0 6821; GFX6-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 6822; GFX6-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 6823; GFX6-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 6824; GFX6-NEXT: v_or_b32_e32 v0, s0, v4 6825; GFX6-NEXT: v_or_b32_e32 v1, s1, v5 6826; GFX6-NEXT: v_or_b32_e32 v2, s2, v2 6827; GFX6-NEXT: v_or_b32_e32 v3, s3, v3 6828; GFX6-NEXT: ; return to shader part epilog 6829; 6830; GFX8-LABEL: v_fshr_i128_vss: 6831; GFX8: ; %bb.0: 6832; GFX8-NEXT: s_mov_b64 s[6:7], 0x7f 6833; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 6834; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 6835; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6836; GFX8-NEXT: s_sub_i32 s5, s4, 64 6837; GFX8-NEXT: s_sub_i32 s6, 64, s4 6838; GFX8-NEXT: v_lshlrev_b64 v[4:5], 1, v[0:1] 6839; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1 6840; GFX8-NEXT: s_cmp_lt_u32 s4, 64 6841; GFX8-NEXT: v_or_b32_e32 v2, v2, v0 6842; GFX8-NEXT: s_cselect_b32 s7, 1, 0 6843; GFX8-NEXT: s_cmp_eq_u32 s4, 0 6844; GFX8-NEXT: s_cselect_b32 s9, 1, 0 6845; GFX8-NEXT: v_lshrrev_b64 v[0:1], s6, v[4:5] 6846; GFX8-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] 6847; GFX8-NEXT: v_lshlrev_b64 v[8:9], s4, v[4:5] 6848; GFX8-NEXT: s_and_b32 s4, 1, s7 6849; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 6850; GFX8-NEXT: s_and_b32 s4, 1, s9 6851; GFX8-NEXT: s_sub_i32 s10, s8, 64 6852; GFX8-NEXT: s_sub_i32 s9, 64, s8 6853; GFX8-NEXT: v_or_b32_e32 v6, v0, v6 6854; GFX8-NEXT: v_or_b32_e32 v7, v1, v7 6855; GFX8-NEXT: v_lshlrev_b64 v[0:1], s5, v[4:5] 6856; GFX8-NEXT: s_cmp_lt_u32 s8, 64 6857; GFX8-NEXT: s_cselect_b32 s11, 1, 0 6858; GFX8-NEXT: s_cmp_eq_u32 s8, 0 6859; GFX8-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc 6860; GFX8-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc 6861; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 6862; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 6863; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 6864; GFX8-NEXT: s_cselect_b32 s12, 1, 0 6865; GFX8-NEXT: s_lshr_b64 s[4:5], s[2:3], s8 6866; GFX8-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 6867; GFX8-NEXT: s_lshl_b64 s[8:9], s[2:3], s9 6868; GFX8-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 6869; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 6870; GFX8-NEXT: s_cmp_lg_u32 s11, 0 6871; GFX8-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] 6872; GFX8-NEXT: s_cmp_lg_u32 s12, 0 6873; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 6874; GFX8-NEXT: s_cmp_lg_u32 s11, 0 6875; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 6876; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 6877; GFX8-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 6878; GFX8-NEXT: v_or_b32_e32 v0, s0, v4 6879; GFX8-NEXT: v_or_b32_e32 v1, s1, v5 6880; GFX8-NEXT: v_or_b32_e32 v2, s2, v2 6881; GFX8-NEXT: v_or_b32_e32 v3, s3, v3 6882; GFX8-NEXT: ; return to shader part epilog 6883; 6884; GFX9-LABEL: v_fshr_i128_vss: 6885; GFX9: ; %bb.0: 6886; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f 6887; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 6888; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 6889; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6890; GFX9-NEXT: s_sub_i32 s5, s4, 64 6891; GFX9-NEXT: s_sub_i32 s6, 64, s4 6892; GFX9-NEXT: v_lshlrev_b64 v[4:5], 1, v[0:1] 6893; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1 6894; GFX9-NEXT: s_cmp_lt_u32 s4, 64 6895; GFX9-NEXT: v_or_b32_e32 v2, v2, v0 6896; GFX9-NEXT: s_cselect_b32 s7, 1, 0 6897; GFX9-NEXT: s_cmp_eq_u32 s4, 0 6898; GFX9-NEXT: s_cselect_b32 s9, 1, 0 6899; GFX9-NEXT: v_lshrrev_b64 v[0:1], s6, v[4:5] 6900; GFX9-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] 6901; GFX9-NEXT: v_lshlrev_b64 v[8:9], s4, v[4:5] 6902; GFX9-NEXT: s_and_b32 s4, 1, s7 6903; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 6904; GFX9-NEXT: s_and_b32 s4, 1, s9 6905; GFX9-NEXT: s_sub_i32 s10, s8, 64 6906; GFX9-NEXT: s_sub_i32 s9, 64, s8 6907; GFX9-NEXT: v_or_b32_e32 v6, v0, v6 6908; GFX9-NEXT: v_or_b32_e32 v7, v1, v7 6909; GFX9-NEXT: v_lshlrev_b64 v[0:1], s5, v[4:5] 6910; GFX9-NEXT: s_cmp_lt_u32 s8, 64 6911; GFX9-NEXT: s_cselect_b32 s11, 1, 0 6912; GFX9-NEXT: s_cmp_eq_u32 s8, 0 6913; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc 6914; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc 6915; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 6916; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 6917; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 6918; GFX9-NEXT: s_cselect_b32 s12, 1, 0 6919; GFX9-NEXT: s_lshr_b64 s[4:5], s[2:3], s8 6920; GFX9-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 6921; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s9 6922; GFX9-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 6923; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 6924; GFX9-NEXT: s_cmp_lg_u32 s11, 0 6925; GFX9-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] 6926; GFX9-NEXT: s_cmp_lg_u32 s12, 0 6927; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 6928; GFX9-NEXT: s_cmp_lg_u32 s11, 0 6929; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 6930; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 6931; GFX9-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 6932; GFX9-NEXT: v_or_b32_e32 v0, s0, v4 6933; GFX9-NEXT: v_or_b32_e32 v1, s1, v5 6934; GFX9-NEXT: v_or_b32_e32 v2, s2, v2 6935; GFX9-NEXT: v_or_b32_e32 v3, s3, v3 6936; GFX9-NEXT: ; return to shader part epilog 6937; 6938; GFX10-LABEL: v_fshr_i128_vss: 6939; GFX10: ; %bb.0: 6940; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6941; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v1 6942; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f 6943; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 6944; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 6945; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 6946; GFX10-NEXT: v_or_b32_e32 v2, v2, v4 6947; GFX10-NEXT: s_sub_i32 s6, 64, s4 6948; GFX10-NEXT: s_sub_i32 s5, s4, 64 6949; GFX10-NEXT: s_cmp_lt_u32 s4, 64 6950; GFX10-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] 6951; GFX10-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] 6952; GFX10-NEXT: s_cselect_b32 s7, 1, 0 6953; GFX10-NEXT: s_cmp_eq_u32 s4, 0 6954; GFX10-NEXT: v_lshlrev_b64 v[8:9], s4, v[0:1] 6955; GFX10-NEXT: s_cselect_b32 s9, 1, 0 6956; GFX10-NEXT: s_and_b32 s4, 1, s7 6957; GFX10-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] 6958; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 6959; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 6960; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 6961; GFX10-NEXT: s_and_b32 s4, 1, s9 6962; GFX10-NEXT: s_sub_i32 s10, s8, 64 6963; GFX10-NEXT: s_sub_i32 s6, 64, s8 6964; GFX10-NEXT: s_cmp_lt_u32 s8, 64 6965; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc_lo 6966; GFX10-NEXT: s_cselect_b32 s11, 1, 0 6967; GFX10-NEXT: s_cmp_eq_u32 s8, 0 6968; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc_lo 6969; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 6970; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 6971; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 6972; GFX10-NEXT: s_cselect_b32 s12, 1, 0 6973; GFX10-NEXT: s_lshr_b64 s[4:5], s[0:1], s8 6974; GFX10-NEXT: s_lshl_b64 s[6:7], s[2:3], s6 6975; GFX10-NEXT: s_lshr_b64 s[8:9], s[2:3], s8 6976; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] 6977; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 6978; GFX10-NEXT: s_cmp_lg_u32 s11, 0 6979; GFX10-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc_lo 6980; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] 6981; GFX10-NEXT: s_cmp_lg_u32 s12, 0 6982; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc_lo 6983; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 6984; GFX10-NEXT: s_cmp_lg_u32 s11, 0 6985; GFX10-NEXT: v_or_b32_e32 v0, s0, v6 6986; GFX10-NEXT: s_cselect_b64 s[2:3], s[8:9], 0 6987; GFX10-NEXT: v_or_b32_e32 v1, s1, v7 6988; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 6989; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 6990; GFX10-NEXT: ; return to shader part epilog 6991; 6992; GFX11-LABEL: v_fshr_i128_vss: 6993; GFX11: ; %bb.0: 6994; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6995; GFX11-NEXT: v_lshrrev_b32_e32 v4, 31, v1 6996; GFX11-NEXT: s_mov_b64 s[6:7], 0x7f 6997; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 6998; GFX11-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 6999; GFX11-NEXT: s_and_not1_b64 s[4:5], s[6:7], s[4:5] 7000; GFX11-NEXT: v_or_b32_e32 v2, v2, v4 7001; GFX11-NEXT: s_sub_i32 s6, 64, s4 7002; GFX11-NEXT: s_sub_i32 s5, s4, 64 7003; GFX11-NEXT: s_cmp_lt_u32 s4, 64 7004; GFX11-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] 7005; GFX11-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] 7006; GFX11-NEXT: s_cselect_b32 s7, 1, 0 7007; GFX11-NEXT: s_cmp_eq_u32 s4, 0 7008; GFX11-NEXT: v_lshlrev_b64 v[8:9], s4, v[0:1] 7009; GFX11-NEXT: s_cselect_b32 s9, 1, 0 7010; GFX11-NEXT: s_and_b32 s4, 1, s7 7011; GFX11-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] 7012; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 7013; GFX11-NEXT: v_or_b32_e32 v4, v4, v6 7014; GFX11-NEXT: v_or_b32_e32 v5, v5, v7 7015; GFX11-NEXT: s_and_b32 s4, 1, s9 7016; GFX11-NEXT: s_sub_i32 s10, s8, 64 7017; GFX11-NEXT: s_sub_i32 s6, 64, s8 7018; GFX11-NEXT: s_cmp_lt_u32 s8, 64 7019; GFX11-NEXT: v_dual_cndmask_b32 v6, 0, v8 :: v_dual_cndmask_b32 v7, 0, v9 7020; GFX11-NEXT: s_cselect_b32 s11, 1, 0 7021; GFX11-NEXT: s_cmp_eq_u32 s8, 0 7022; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 7023; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 7024; GFX11-NEXT: s_cselect_b32 s12, 1, 0 7025; GFX11-NEXT: s_lshr_b64 s[4:5], s[0:1], s8 7026; GFX11-NEXT: s_lshl_b64 s[6:7], s[2:3], s6 7027; GFX11-NEXT: s_lshr_b64 s[8:9], s[2:3], s8 7028; GFX11-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] 7029; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 7030; GFX11-NEXT: s_cmp_lg_u32 s11, 0 7031; GFX11-NEXT: v_dual_cndmask_b32 v2, v0, v2 :: v_dual_cndmask_b32 v3, v1, v3 7032; GFX11-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] 7033; GFX11-NEXT: s_cmp_lg_u32 s12, 0 7034; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 7035; GFX11-NEXT: s_cmp_lg_u32 s11, 0 7036; GFX11-NEXT: v_or_b32_e32 v0, s0, v6 7037; GFX11-NEXT: s_cselect_b64 s[2:3], s[8:9], 0 7038; GFX11-NEXT: v_or_b32_e32 v1, s1, v7 7039; GFX11-NEXT: v_or_b32_e32 v2, s2, v2 7040; GFX11-NEXT: v_or_b32_e32 v3, s3, v3 7041; GFX11-NEXT: ; return to shader part epilog 7042 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 7043 %cast.result = bitcast i128 %result to <4 x float> 7044 ret <4 x float> %cast.result 7045} 7046 7047define amdgpu_ps i128 @s_fshr_i128_65(i128 inreg %lhs, i128 inreg %rhs) { 7048; GFX6-LABEL: s_fshr_i128_65: 7049; GFX6: ; %bb.0: 7050; GFX6-NEXT: s_mov_b32 s4, 0 7051; GFX6-NEXT: s_lshl_b32 s5, s0, 31 7052; GFX6-NEXT: s_lshl_b32 s3, s2, 31 7053; GFX6-NEXT: s_mov_b32 s2, s4 7054; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 7055; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1] 7056; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], 1 7057; GFX6-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 7058; GFX6-NEXT: ; return to shader part epilog 7059; 7060; GFX8-LABEL: s_fshr_i128_65: 7061; GFX8: ; %bb.0: 7062; GFX8-NEXT: s_mov_b32 s4, 0 7063; GFX8-NEXT: s_lshl_b32 s5, s0, 31 7064; GFX8-NEXT: s_lshl_b32 s3, s2, 31 7065; GFX8-NEXT: s_mov_b32 s2, s4 7066; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 7067; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1] 7068; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], 1 7069; GFX8-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 7070; GFX8-NEXT: ; return to shader part epilog 7071; 7072; GFX9-LABEL: s_fshr_i128_65: 7073; GFX9: ; %bb.0: 7074; GFX9-NEXT: s_mov_b32 s4, 0 7075; GFX9-NEXT: s_lshl_b32 s5, s0, 31 7076; GFX9-NEXT: s_lshl_b32 s3, s2, 31 7077; GFX9-NEXT: s_mov_b32 s2, s4 7078; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 7079; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1] 7080; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], 1 7081; GFX9-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 7082; GFX9-NEXT: ; return to shader part epilog 7083; 7084; GFX10-LABEL: s_fshr_i128_65: 7085; GFX10: ; %bb.0: 7086; GFX10-NEXT: s_mov_b32 s4, 0 7087; GFX10-NEXT: s_lshl_b32 s5, s0, 31 7088; GFX10-NEXT: s_lshl_b32 s3, s2, 31 7089; GFX10-NEXT: s_mov_b32 s2, s4 7090; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], 1 7091; GFX10-NEXT: s_lshr_b64 s[8:9], s[0:1], 1 7092; GFX10-NEXT: s_or_b64 s[0:1], s[4:5], s[6:7] 7093; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 7094; GFX10-NEXT: ; return to shader part epilog 7095; 7096; GFX11-LABEL: s_fshr_i128_65: 7097; GFX11: ; %bb.0: 7098; GFX11-NEXT: s_mov_b32 s4, 0 7099; GFX11-NEXT: s_lshl_b32 s5, s0, 31 7100; GFX11-NEXT: s_lshl_b32 s3, s2, 31 7101; GFX11-NEXT: s_mov_b32 s2, s4 7102; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], 1 7103; GFX11-NEXT: s_lshr_b64 s[8:9], s[0:1], 1 7104; GFX11-NEXT: s_or_b64 s[0:1], s[4:5], s[6:7] 7105; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 7106; GFX11-NEXT: ; return to shader part epilog 7107 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65) 7108 ret i128 %result 7109} 7110 7111define i128 @v_fshr_i128_65(i128 %lhs, i128 %rhs) { 7112; GFX6-LABEL: v_fshr_i128_65: 7113; GFX6: ; %bb.0: 7114; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7115; GFX6-NEXT: v_lshlrev_b32_e32 v4, 31, v0 7116; GFX6-NEXT: v_lshlrev_b32_e32 v5, 31, v2 7117; GFX6-NEXT: v_lshr_b64 v[2:3], v[0:1], 1 7118; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], 1 7119; GFX6-NEXT: v_or_b32_e32 v3, v5, v3 7120; GFX6-NEXT: v_or_b32_e32 v1, v4, v1 7121; GFX6-NEXT: s_setpc_b64 s[30:31] 7122; 7123; GFX8-LABEL: v_fshr_i128_65: 7124; GFX8: ; %bb.0: 7125; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7126; GFX8-NEXT: v_lshlrev_b32_e32 v4, 31, v0 7127; GFX8-NEXT: v_lshlrev_b32_e32 v5, 31, v2 7128; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] 7129; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[6:7] 7130; GFX8-NEXT: v_or_b32_e32 v3, v5, v3 7131; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 7132; GFX8-NEXT: s_setpc_b64 s[30:31] 7133; 7134; GFX9-LABEL: v_fshr_i128_65: 7135; GFX9: ; %bb.0: 7136; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7137; GFX9-NEXT: v_lshlrev_b32_e32 v4, 31, v0 7138; GFX9-NEXT: v_lshlrev_b32_e32 v5, 31, v2 7139; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] 7140; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[6:7] 7141; GFX9-NEXT: v_or_b32_e32 v3, v5, v3 7142; GFX9-NEXT: v_or_b32_e32 v1, v4, v1 7143; GFX9-NEXT: s_setpc_b64 s[30:31] 7144; 7145; GFX10-LABEL: v_fshr_i128_65: 7146; GFX10: ; %bb.0: 7147; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7148; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 7149; GFX10-NEXT: v_mov_b32_e32 v8, v2 7150; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7] 7151; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] 7152; GFX10-NEXT: v_lshlrev_b32_e32 v9, 31, v0 7153; GFX10-NEXT: v_lshlrev_b32_e32 v0, 31, v8 7154; GFX10-NEXT: v_or_b32_e32 v1, v9, v5 7155; GFX10-NEXT: v_or_b32_e32 v3, v0, v3 7156; GFX10-NEXT: v_mov_b32_e32 v0, v4 7157; GFX10-NEXT: s_setpc_b64 s[30:31] 7158; 7159; GFX11-LABEL: v_fshr_i128_65: 7160; GFX11: ; %bb.0: 7161; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7162; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 7163; GFX11-NEXT: v_dual_mov_b32 v8, v2 :: v_dual_lshlrev_b32 v9, 31, v0 7164; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7] 7165; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] 7166; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 7167; GFX11-NEXT: v_lshlrev_b32_e32 v0, 31, v8 7168; GFX11-NEXT: v_or_b32_e32 v1, v9, v5 7169; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 7170; GFX11-NEXT: v_or_b32_e32 v3, v0, v3 7171; GFX11-NEXT: v_mov_b32_e32 v0, v4 7172; GFX11-NEXT: s_setpc_b64 s[30:31] 7173 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65) 7174 ret i128 %result 7175} 7176 7177define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) { 7178; GFX6-LABEL: s_fshr_v2i128: 7179; GFX6: ; %bb.0: 7180; GFX6-NEXT: s_movk_i32 s18, 0x7f 7181; GFX6-NEXT: s_mov_b32 s19, 0 7182; GFX6-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] 7183; GFX6-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] 7184; GFX6-NEXT: s_lshl_b64 s[24:25], s[0:1], 1 7185; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 7186; GFX6-NEXT: s_lshr_b32 s0, s1, 31 7187; GFX6-NEXT: s_mov_b32 s1, s19 7188; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 7189; GFX6-NEXT: s_sub_i32 s23, s16, 64 7190; GFX6-NEXT: s_sub_i32 s17, 64, s16 7191; GFX6-NEXT: s_cmp_lt_u32 s16, 64 7192; GFX6-NEXT: s_cselect_b32 s28, 1, 0 7193; GFX6-NEXT: s_cmp_eq_u32 s16, 0 7194; GFX6-NEXT: s_cselect_b32 s29, 1, 0 7195; GFX6-NEXT: s_lshl_b64 s[2:3], s[24:25], s16 7196; GFX6-NEXT: s_lshr_b64 s[26:27], s[24:25], s17 7197; GFX6-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 7198; GFX6-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17] 7199; GFX6-NEXT: s_lshl_b64 s[24:25], s[24:25], s23 7200; GFX6-NEXT: s_cmp_lg_u32 s28, 0 7201; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 7202; GFX6-NEXT: s_cselect_b64 s[16:17], s[16:17], s[24:25] 7203; GFX6-NEXT: s_cmp_lg_u32 s29, 0 7204; GFX6-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17] 7205; GFX6-NEXT: s_sub_i32 s26, s22, 64 7206; GFX6-NEXT: s_sub_i32 s24, 64, s22 7207; GFX6-NEXT: s_cmp_lt_u32 s22, 64 7208; GFX6-NEXT: s_cselect_b32 s27, 1, 0 7209; GFX6-NEXT: s_cmp_eq_u32 s22, 0 7210; GFX6-NEXT: s_cselect_b32 s28, 1, 0 7211; GFX6-NEXT: s_lshr_b64 s[0:1], s[10:11], s22 7212; GFX6-NEXT: s_lshr_b64 s[22:23], s[8:9], s22 7213; GFX6-NEXT: s_lshl_b64 s[24:25], s[10:11], s24 7214; GFX6-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] 7215; GFX6-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 7216; GFX6-NEXT: s_cmp_lg_u32 s27, 0 7217; GFX6-NEXT: s_cselect_b64 s[10:11], s[22:23], s[10:11] 7218; GFX6-NEXT: s_cmp_lg_u32 s28, 0 7219; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 7220; GFX6-NEXT: s_cmp_lg_u32 s27, 0 7221; GFX6-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 7222; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 7223; GFX6-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11] 7224; GFX6-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] 7225; GFX6-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] 7226; GFX6-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 7227; GFX6-NEXT: s_lshr_b32 s18, s5, 31 7228; GFX6-NEXT: s_lshl_b64 s[16:17], s[4:5], 1 7229; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[18:19] 7230; GFX6-NEXT: s_sub_i32 s9, s10, 64 7231; GFX6-NEXT: s_sub_i32 s11, 64, s10 7232; GFX6-NEXT: s_cmp_lt_u32 s10, 64 7233; GFX6-NEXT: s_cselect_b32 s20, 1, 0 7234; GFX6-NEXT: s_cmp_eq_u32 s10, 0 7235; GFX6-NEXT: s_cselect_b32 s21, 1, 0 7236; GFX6-NEXT: s_lshl_b64 s[6:7], s[16:17], s10 7237; GFX6-NEXT: s_lshr_b64 s[18:19], s[16:17], s11 7238; GFX6-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 7239; GFX6-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11] 7240; GFX6-NEXT: s_lshl_b64 s[16:17], s[16:17], s9 7241; GFX6-NEXT: s_cmp_lg_u32 s20, 0 7242; GFX6-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 7243; GFX6-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17] 7244; GFX6-NEXT: s_cmp_lg_u32 s21, 0 7245; GFX6-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11] 7246; GFX6-NEXT: s_sub_i32 s18, s8, 64 7247; GFX6-NEXT: s_sub_i32 s16, 64, s8 7248; GFX6-NEXT: s_cmp_lt_u32 s8, 64 7249; GFX6-NEXT: s_cselect_b32 s19, 1, 0 7250; GFX6-NEXT: s_cmp_eq_u32 s8, 0 7251; GFX6-NEXT: s_cselect_b32 s20, 1, 0 7252; GFX6-NEXT: s_lshr_b64 s[4:5], s[14:15], s8 7253; GFX6-NEXT: s_lshr_b64 s[8:9], s[12:13], s8 7254; GFX6-NEXT: s_lshl_b64 s[16:17], s[14:15], s16 7255; GFX6-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17] 7256; GFX6-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 7257; GFX6-NEXT: s_cmp_lg_u32 s19, 0 7258; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15] 7259; GFX6-NEXT: s_cmp_lg_u32 s20, 0 7260; GFX6-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9] 7261; GFX6-NEXT: s_cmp_lg_u32 s19, 0 7262; GFX6-NEXT: s_cselect_b64 s[12:13], s[4:5], 0 7263; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9] 7264; GFX6-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13] 7265; GFX6-NEXT: ; return to shader part epilog 7266; 7267; GFX8-LABEL: s_fshr_v2i128: 7268; GFX8: ; %bb.0: 7269; GFX8-NEXT: s_movk_i32 s18, 0x7f 7270; GFX8-NEXT: s_mov_b32 s19, 0 7271; GFX8-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] 7272; GFX8-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] 7273; GFX8-NEXT: s_lshl_b64 s[24:25], s[0:1], 1 7274; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 7275; GFX8-NEXT: s_lshr_b32 s0, s1, 31 7276; GFX8-NEXT: s_mov_b32 s1, s19 7277; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 7278; GFX8-NEXT: s_sub_i32 s23, s16, 64 7279; GFX8-NEXT: s_sub_i32 s17, 64, s16 7280; GFX8-NEXT: s_cmp_lt_u32 s16, 64 7281; GFX8-NEXT: s_cselect_b32 s28, 1, 0 7282; GFX8-NEXT: s_cmp_eq_u32 s16, 0 7283; GFX8-NEXT: s_cselect_b32 s29, 1, 0 7284; GFX8-NEXT: s_lshl_b64 s[2:3], s[24:25], s16 7285; GFX8-NEXT: s_lshr_b64 s[26:27], s[24:25], s17 7286; GFX8-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 7287; GFX8-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17] 7288; GFX8-NEXT: s_lshl_b64 s[24:25], s[24:25], s23 7289; GFX8-NEXT: s_cmp_lg_u32 s28, 0 7290; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 7291; GFX8-NEXT: s_cselect_b64 s[16:17], s[16:17], s[24:25] 7292; GFX8-NEXT: s_cmp_lg_u32 s29, 0 7293; GFX8-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17] 7294; GFX8-NEXT: s_sub_i32 s26, s22, 64 7295; GFX8-NEXT: s_sub_i32 s24, 64, s22 7296; GFX8-NEXT: s_cmp_lt_u32 s22, 64 7297; GFX8-NEXT: s_cselect_b32 s27, 1, 0 7298; GFX8-NEXT: s_cmp_eq_u32 s22, 0 7299; GFX8-NEXT: s_cselect_b32 s28, 1, 0 7300; GFX8-NEXT: s_lshr_b64 s[0:1], s[10:11], s22 7301; GFX8-NEXT: s_lshr_b64 s[22:23], s[8:9], s22 7302; GFX8-NEXT: s_lshl_b64 s[24:25], s[10:11], s24 7303; GFX8-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] 7304; GFX8-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 7305; GFX8-NEXT: s_cmp_lg_u32 s27, 0 7306; GFX8-NEXT: s_cselect_b64 s[10:11], s[22:23], s[10:11] 7307; GFX8-NEXT: s_cmp_lg_u32 s28, 0 7308; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 7309; GFX8-NEXT: s_cmp_lg_u32 s27, 0 7310; GFX8-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 7311; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 7312; GFX8-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11] 7313; GFX8-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] 7314; GFX8-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] 7315; GFX8-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 7316; GFX8-NEXT: s_lshr_b32 s18, s5, 31 7317; GFX8-NEXT: s_lshl_b64 s[16:17], s[4:5], 1 7318; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[18:19] 7319; GFX8-NEXT: s_sub_i32 s9, s10, 64 7320; GFX8-NEXT: s_sub_i32 s11, 64, s10 7321; GFX8-NEXT: s_cmp_lt_u32 s10, 64 7322; GFX8-NEXT: s_cselect_b32 s20, 1, 0 7323; GFX8-NEXT: s_cmp_eq_u32 s10, 0 7324; GFX8-NEXT: s_cselect_b32 s21, 1, 0 7325; GFX8-NEXT: s_lshl_b64 s[6:7], s[16:17], s10 7326; GFX8-NEXT: s_lshr_b64 s[18:19], s[16:17], s11 7327; GFX8-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 7328; GFX8-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11] 7329; GFX8-NEXT: s_lshl_b64 s[16:17], s[16:17], s9 7330; GFX8-NEXT: s_cmp_lg_u32 s20, 0 7331; GFX8-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 7332; GFX8-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17] 7333; GFX8-NEXT: s_cmp_lg_u32 s21, 0 7334; GFX8-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11] 7335; GFX8-NEXT: s_sub_i32 s18, s8, 64 7336; GFX8-NEXT: s_sub_i32 s16, 64, s8 7337; GFX8-NEXT: s_cmp_lt_u32 s8, 64 7338; GFX8-NEXT: s_cselect_b32 s19, 1, 0 7339; GFX8-NEXT: s_cmp_eq_u32 s8, 0 7340; GFX8-NEXT: s_cselect_b32 s20, 1, 0 7341; GFX8-NEXT: s_lshr_b64 s[4:5], s[14:15], s8 7342; GFX8-NEXT: s_lshr_b64 s[8:9], s[12:13], s8 7343; GFX8-NEXT: s_lshl_b64 s[16:17], s[14:15], s16 7344; GFX8-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17] 7345; GFX8-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 7346; GFX8-NEXT: s_cmp_lg_u32 s19, 0 7347; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15] 7348; GFX8-NEXT: s_cmp_lg_u32 s20, 0 7349; GFX8-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9] 7350; GFX8-NEXT: s_cmp_lg_u32 s19, 0 7351; GFX8-NEXT: s_cselect_b64 s[12:13], s[4:5], 0 7352; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9] 7353; GFX8-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13] 7354; GFX8-NEXT: ; return to shader part epilog 7355; 7356; GFX9-LABEL: s_fshr_v2i128: 7357; GFX9: ; %bb.0: 7358; GFX9-NEXT: s_movk_i32 s18, 0x7f 7359; GFX9-NEXT: s_mov_b32 s19, 0 7360; GFX9-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] 7361; GFX9-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] 7362; GFX9-NEXT: s_lshl_b64 s[24:25], s[0:1], 1 7363; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 7364; GFX9-NEXT: s_lshr_b32 s0, s1, 31 7365; GFX9-NEXT: s_mov_b32 s1, s19 7366; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 7367; GFX9-NEXT: s_sub_i32 s23, s16, 64 7368; GFX9-NEXT: s_sub_i32 s17, 64, s16 7369; GFX9-NEXT: s_cmp_lt_u32 s16, 64 7370; GFX9-NEXT: s_cselect_b32 s28, 1, 0 7371; GFX9-NEXT: s_cmp_eq_u32 s16, 0 7372; GFX9-NEXT: s_cselect_b32 s29, 1, 0 7373; GFX9-NEXT: s_lshl_b64 s[2:3], s[24:25], s16 7374; GFX9-NEXT: s_lshr_b64 s[26:27], s[24:25], s17 7375; GFX9-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 7376; GFX9-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17] 7377; GFX9-NEXT: s_lshl_b64 s[24:25], s[24:25], s23 7378; GFX9-NEXT: s_cmp_lg_u32 s28, 0 7379; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 7380; GFX9-NEXT: s_cselect_b64 s[16:17], s[16:17], s[24:25] 7381; GFX9-NEXT: s_cmp_lg_u32 s29, 0 7382; GFX9-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17] 7383; GFX9-NEXT: s_sub_i32 s26, s22, 64 7384; GFX9-NEXT: s_sub_i32 s24, 64, s22 7385; GFX9-NEXT: s_cmp_lt_u32 s22, 64 7386; GFX9-NEXT: s_cselect_b32 s27, 1, 0 7387; GFX9-NEXT: s_cmp_eq_u32 s22, 0 7388; GFX9-NEXT: s_cselect_b32 s28, 1, 0 7389; GFX9-NEXT: s_lshr_b64 s[0:1], s[10:11], s22 7390; GFX9-NEXT: s_lshr_b64 s[22:23], s[8:9], s22 7391; GFX9-NEXT: s_lshl_b64 s[24:25], s[10:11], s24 7392; GFX9-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] 7393; GFX9-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 7394; GFX9-NEXT: s_cmp_lg_u32 s27, 0 7395; GFX9-NEXT: s_cselect_b64 s[10:11], s[22:23], s[10:11] 7396; GFX9-NEXT: s_cmp_lg_u32 s28, 0 7397; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 7398; GFX9-NEXT: s_cmp_lg_u32 s27, 0 7399; GFX9-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 7400; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 7401; GFX9-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11] 7402; GFX9-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] 7403; GFX9-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] 7404; GFX9-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 7405; GFX9-NEXT: s_lshr_b32 s18, s5, 31 7406; GFX9-NEXT: s_lshl_b64 s[16:17], s[4:5], 1 7407; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[18:19] 7408; GFX9-NEXT: s_sub_i32 s9, s10, 64 7409; GFX9-NEXT: s_sub_i32 s11, 64, s10 7410; GFX9-NEXT: s_cmp_lt_u32 s10, 64 7411; GFX9-NEXT: s_cselect_b32 s20, 1, 0 7412; GFX9-NEXT: s_cmp_eq_u32 s10, 0 7413; GFX9-NEXT: s_cselect_b32 s21, 1, 0 7414; GFX9-NEXT: s_lshl_b64 s[6:7], s[16:17], s10 7415; GFX9-NEXT: s_lshr_b64 s[18:19], s[16:17], s11 7416; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 7417; GFX9-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11] 7418; GFX9-NEXT: s_lshl_b64 s[16:17], s[16:17], s9 7419; GFX9-NEXT: s_cmp_lg_u32 s20, 0 7420; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 7421; GFX9-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17] 7422; GFX9-NEXT: s_cmp_lg_u32 s21, 0 7423; GFX9-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11] 7424; GFX9-NEXT: s_sub_i32 s18, s8, 64 7425; GFX9-NEXT: s_sub_i32 s16, 64, s8 7426; GFX9-NEXT: s_cmp_lt_u32 s8, 64 7427; GFX9-NEXT: s_cselect_b32 s19, 1, 0 7428; GFX9-NEXT: s_cmp_eq_u32 s8, 0 7429; GFX9-NEXT: s_cselect_b32 s20, 1, 0 7430; GFX9-NEXT: s_lshr_b64 s[4:5], s[14:15], s8 7431; GFX9-NEXT: s_lshr_b64 s[8:9], s[12:13], s8 7432; GFX9-NEXT: s_lshl_b64 s[16:17], s[14:15], s16 7433; GFX9-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17] 7434; GFX9-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 7435; GFX9-NEXT: s_cmp_lg_u32 s19, 0 7436; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15] 7437; GFX9-NEXT: s_cmp_lg_u32 s20, 0 7438; GFX9-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9] 7439; GFX9-NEXT: s_cmp_lg_u32 s19, 0 7440; GFX9-NEXT: s_cselect_b64 s[12:13], s[4:5], 0 7441; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9] 7442; GFX9-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13] 7443; GFX9-NEXT: ; return to shader part epilog 7444; 7445; GFX10-LABEL: s_fshr_v2i128: 7446; GFX10: ; %bb.0: 7447; GFX10-NEXT: s_movk_i32 s18, 0x7f 7448; GFX10-NEXT: s_mov_b32 s19, 0 7449; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 7450; GFX10-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] 7451; GFX10-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] 7452; GFX10-NEXT: s_lshr_b32 s24, s1, 31 7453; GFX10-NEXT: s_mov_b32 s25, s19 7454; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 7455; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[24:25] 7456; GFX10-NEXT: s_sub_i32 s23, s16, 64 7457; GFX10-NEXT: s_sub_i32 s17, 64, s16 7458; GFX10-NEXT: s_cmp_lt_u32 s16, 64 7459; GFX10-NEXT: s_cselect_b32 s28, 1, 0 7460; GFX10-NEXT: s_cmp_eq_u32 s16, 0 7461; GFX10-NEXT: s_cselect_b32 s29, 1, 0 7462; GFX10-NEXT: s_lshr_b64 s[24:25], s[0:1], s17 7463; GFX10-NEXT: s_lshl_b64 s[26:27], s[2:3], s16 7464; GFX10-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 7465; GFX10-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27] 7466; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s23 7467; GFX10-NEXT: s_cmp_lg_u32 s28, 0 7468; GFX10-NEXT: s_cselect_b64 s[16:17], s[16:17], 0 7469; GFX10-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1] 7470; GFX10-NEXT: s_cmp_lg_u32 s29, 0 7471; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 7472; GFX10-NEXT: s_sub_i32 s26, s22, 64 7473; GFX10-NEXT: s_sub_i32 s23, 64, s22 7474; GFX10-NEXT: s_cmp_lt_u32 s22, 64 7475; GFX10-NEXT: s_cselect_b32 s27, 1, 0 7476; GFX10-NEXT: s_cmp_eq_u32 s22, 0 7477; GFX10-NEXT: s_cselect_b32 s28, 1, 0 7478; GFX10-NEXT: s_lshr_b64 s[0:1], s[8:9], s22 7479; GFX10-NEXT: s_lshl_b64 s[24:25], s[10:11], s23 7480; GFX10-NEXT: s_lshr_b64 s[22:23], s[10:11], s22 7481; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[24:25] 7482; GFX10-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 7483; GFX10-NEXT: s_cmp_lg_u32 s27, 0 7484; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[10:11] 7485; GFX10-NEXT: s_cmp_lg_u32 s28, 0 7486; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] 7487; GFX10-NEXT: s_cmp_lg_u32 s27, 0 7488; GFX10-NEXT: s_cselect_b64 s[8:9], s[22:23], 0 7489; GFX10-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] 7490; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 7491; GFX10-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] 7492; GFX10-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 7493; GFX10-NEXT: s_lshr_b32 s18, s5, 31 7494; GFX10-NEXT: s_or_b64 s[0:1], s[16:17], s[0:1] 7495; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], 1 7496; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[18:19] 7497; GFX10-NEXT: s_sub_i32 s9, s10, 64 7498; GFX10-NEXT: s_sub_i32 s11, 64, s10 7499; GFX10-NEXT: s_cmp_lt_u32 s10, 64 7500; GFX10-NEXT: s_cselect_b32 s20, 1, 0 7501; GFX10-NEXT: s_cmp_eq_u32 s10, 0 7502; GFX10-NEXT: s_cselect_b32 s21, 1, 0 7503; GFX10-NEXT: s_lshr_b64 s[16:17], s[4:5], s11 7504; GFX10-NEXT: s_lshl_b64 s[18:19], s[6:7], s10 7505; GFX10-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 7506; GFX10-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] 7507; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], s9 7508; GFX10-NEXT: s_cmp_lg_u32 s20, 0 7509; GFX10-NEXT: s_cselect_b64 s[10:11], s[10:11], 0 7510; GFX10-NEXT: s_cselect_b64 s[4:5], s[16:17], s[4:5] 7511; GFX10-NEXT: s_cmp_lg_u32 s21, 0 7512; GFX10-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] 7513; GFX10-NEXT: s_sub_i32 s18, s8, 64 7514; GFX10-NEXT: s_sub_i32 s9, 64, s8 7515; GFX10-NEXT: s_cmp_lt_u32 s8, 64 7516; GFX10-NEXT: s_cselect_b32 s19, 1, 0 7517; GFX10-NEXT: s_cmp_eq_u32 s8, 0 7518; GFX10-NEXT: s_cselect_b32 s20, 1, 0 7519; GFX10-NEXT: s_lshr_b64 s[4:5], s[12:13], s8 7520; GFX10-NEXT: s_lshl_b64 s[16:17], s[14:15], s9 7521; GFX10-NEXT: s_lshr_b64 s[8:9], s[14:15], s8 7522; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[16:17] 7523; GFX10-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 7524; GFX10-NEXT: s_cmp_lg_u32 s19, 0 7525; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[14:15] 7526; GFX10-NEXT: s_cmp_lg_u32 s20, 0 7527; GFX10-NEXT: s_cselect_b64 s[4:5], s[12:13], s[4:5] 7528; GFX10-NEXT: s_cmp_lg_u32 s19, 0 7529; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 7530; GFX10-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] 7531; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 7532; GFX10-NEXT: ; return to shader part epilog 7533; 7534; GFX11-LABEL: s_fshr_v2i128: 7535; GFX11: ; %bb.0: 7536; GFX11-NEXT: s_movk_i32 s18, 0x7f 7537; GFX11-NEXT: s_mov_b32 s19, 0 7538; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 7539; GFX11-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] 7540; GFX11-NEXT: s_and_not1_b64 s[16:17], s[18:19], s[16:17] 7541; GFX11-NEXT: s_lshr_b32 s24, s1, 31 7542; GFX11-NEXT: s_mov_b32 s25, s19 7543; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 7544; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[24:25] 7545; GFX11-NEXT: s_sub_i32 s23, s16, 64 7546; GFX11-NEXT: s_sub_i32 s17, 64, s16 7547; GFX11-NEXT: s_cmp_lt_u32 s16, 64 7548; GFX11-NEXT: s_cselect_b32 s28, 1, 0 7549; GFX11-NEXT: s_cmp_eq_u32 s16, 0 7550; GFX11-NEXT: s_cselect_b32 s29, 1, 0 7551; GFX11-NEXT: s_lshr_b64 s[24:25], s[0:1], s17 7552; GFX11-NEXT: s_lshl_b64 s[26:27], s[2:3], s16 7553; GFX11-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 7554; GFX11-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27] 7555; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s23 7556; GFX11-NEXT: s_cmp_lg_u32 s28, 0 7557; GFX11-NEXT: s_cselect_b64 s[16:17], s[16:17], 0 7558; GFX11-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1] 7559; GFX11-NEXT: s_cmp_lg_u32 s29, 0 7560; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 7561; GFX11-NEXT: s_sub_i32 s26, s22, 64 7562; GFX11-NEXT: s_sub_i32 s23, 64, s22 7563; GFX11-NEXT: s_cmp_lt_u32 s22, 64 7564; GFX11-NEXT: s_cselect_b32 s27, 1, 0 7565; GFX11-NEXT: s_cmp_eq_u32 s22, 0 7566; GFX11-NEXT: s_cselect_b32 s28, 1, 0 7567; GFX11-NEXT: s_lshr_b64 s[0:1], s[8:9], s22 7568; GFX11-NEXT: s_lshl_b64 s[24:25], s[10:11], s23 7569; GFX11-NEXT: s_lshr_b64 s[22:23], s[10:11], s22 7570; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[24:25] 7571; GFX11-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 7572; GFX11-NEXT: s_cmp_lg_u32 s27, 0 7573; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[10:11] 7574; GFX11-NEXT: s_cmp_lg_u32 s28, 0 7575; GFX11-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] 7576; GFX11-NEXT: s_cmp_lg_u32 s27, 0 7577; GFX11-NEXT: s_cselect_b64 s[8:9], s[22:23], 0 7578; GFX11-NEXT: s_and_not1_b64 s[10:11], s[18:19], s[20:21] 7579; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 7580; GFX11-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] 7581; GFX11-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 7582; GFX11-NEXT: s_lshr_b32 s18, s5, 31 7583; GFX11-NEXT: s_or_b64 s[0:1], s[16:17], s[0:1] 7584; GFX11-NEXT: s_lshl_b64 s[4:5], s[4:5], 1 7585; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[18:19] 7586; GFX11-NEXT: s_sub_i32 s9, s10, 64 7587; GFX11-NEXT: s_sub_i32 s11, 64, s10 7588; GFX11-NEXT: s_cmp_lt_u32 s10, 64 7589; GFX11-NEXT: s_cselect_b32 s20, 1, 0 7590; GFX11-NEXT: s_cmp_eq_u32 s10, 0 7591; GFX11-NEXT: s_cselect_b32 s21, 1, 0 7592; GFX11-NEXT: s_lshr_b64 s[16:17], s[4:5], s11 7593; GFX11-NEXT: s_lshl_b64 s[18:19], s[6:7], s10 7594; GFX11-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 7595; GFX11-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] 7596; GFX11-NEXT: s_lshl_b64 s[4:5], s[4:5], s9 7597; GFX11-NEXT: s_cmp_lg_u32 s20, 0 7598; GFX11-NEXT: s_cselect_b64 s[10:11], s[10:11], 0 7599; GFX11-NEXT: s_cselect_b64 s[4:5], s[16:17], s[4:5] 7600; GFX11-NEXT: s_cmp_lg_u32 s21, 0 7601; GFX11-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] 7602; GFX11-NEXT: s_sub_i32 s18, s8, 64 7603; GFX11-NEXT: s_sub_i32 s9, 64, s8 7604; GFX11-NEXT: s_cmp_lt_u32 s8, 64 7605; GFX11-NEXT: s_cselect_b32 s19, 1, 0 7606; GFX11-NEXT: s_cmp_eq_u32 s8, 0 7607; GFX11-NEXT: s_cselect_b32 s20, 1, 0 7608; GFX11-NEXT: s_lshr_b64 s[4:5], s[12:13], s8 7609; GFX11-NEXT: s_lshl_b64 s[16:17], s[14:15], s9 7610; GFX11-NEXT: s_lshr_b64 s[8:9], s[14:15], s8 7611; GFX11-NEXT: s_or_b64 s[4:5], s[4:5], s[16:17] 7612; GFX11-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 7613; GFX11-NEXT: s_cmp_lg_u32 s19, 0 7614; GFX11-NEXT: s_cselect_b64 s[4:5], s[4:5], s[14:15] 7615; GFX11-NEXT: s_cmp_lg_u32 s20, 0 7616; GFX11-NEXT: s_cselect_b64 s[4:5], s[12:13], s[4:5] 7617; GFX11-NEXT: s_cmp_lg_u32 s19, 0 7618; GFX11-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 7619; GFX11-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] 7620; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 7621; GFX11-NEXT: ; return to shader part epilog 7622 %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) 7623 ret <2 x i128> %result 7624} 7625 7626define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) { 7627; GFX6-LABEL: v_fshr_v2i128: 7628; GFX6: ; %bb.0: 7629; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7630; GFX6-NEXT: v_xor_b32_e32 v17, -1, v16 7631; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 7632; GFX6-NEXT: v_and_b32_e32 v23, 0x7f, v17 7633; GFX6-NEXT: v_lshrrev_b32_e32 v17, 31, v1 7634; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 7635; GFX6-NEXT: v_or_b32_e32 v2, v2, v17 7636; GFX6-NEXT: v_sub_i32_e32 v17, vcc, 64, v23 7637; GFX6-NEXT: v_lshr_b64 v[17:18], v[0:1], v17 7638; GFX6-NEXT: v_lshl_b64 v[21:22], v[2:3], v23 7639; GFX6-NEXT: v_and_b32_e32 v24, 0x7f, v16 7640; GFX6-NEXT: v_sub_i32_e32 v16, vcc, 64, v24 7641; GFX6-NEXT: v_or_b32_e32 v21, v17, v21 7642; GFX6-NEXT: v_or_b32_e32 v22, v18, v22 7643; GFX6-NEXT: v_lshl_b64 v[16:17], v[10:11], v16 7644; GFX6-NEXT: v_lshr_b64 v[18:19], v[8:9], v24 7645; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23 7646; GFX6-NEXT: v_or_b32_e32 v18, v18, v16 7647; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v23 7648; GFX6-NEXT: v_or_b32_e32 v19, v19, v17 7649; GFX6-NEXT: v_lshl_b64 v[16:17], v[0:1], v16 7650; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v23 7651; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23 7652; GFX6-NEXT: v_cndmask_b32_e32 v25, 0, v0, vcc 7653; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v21, vcc 7654; GFX6-NEXT: v_cndmask_b32_e32 v16, v17, v22, vcc 7655; GFX6-NEXT: v_cndmask_b32_e64 v17, v0, v2, s[4:5] 7656; GFX6-NEXT: v_cndmask_b32_e64 v16, v16, v3, s[4:5] 7657; GFX6-NEXT: v_subrev_i32_e64 v0, s[4:5], 64, v24 7658; GFX6-NEXT: v_lshr_b64 v[2:3], v[10:11], v0 7659; GFX6-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24 7660; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5] 7661; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v1, vcc 7662; GFX6-NEXT: v_lshr_b64 v[0:1], v[10:11], v24 7663; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 7664; GFX6-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[4:5] 7665; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 7666; GFX6-NEXT: v_cndmask_b32_e64 v8, 0, v0, s[4:5] 7667; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc 7668; GFX6-NEXT: v_cndmask_b32_e64 v9, 0, v1, s[4:5] 7669; GFX6-NEXT: v_or_b32_e32 v0, v25, v2 7670; GFX6-NEXT: v_or_b32_e32 v2, v17, v8 7671; GFX6-NEXT: v_xor_b32_e32 v8, -1, v20 7672; GFX6-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 7673; GFX6-NEXT: v_or_b32_e32 v1, v18, v3 7674; GFX6-NEXT: v_or_b32_e32 v3, v16, v9 7675; GFX6-NEXT: v_and_b32_e32 v17, 0x7f, v8 7676; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], 1 7677; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v5 7678; GFX6-NEXT: v_or_b32_e32 v6, v6, v4 7679; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 64, v17 7680; GFX6-NEXT: v_lshr_b64 v[4:5], v[8:9], v4 7681; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v17 7682; GFX6-NEXT: v_subrev_i32_e32 v18, vcc, 64, v17 7683; GFX6-NEXT: v_or_b32_e32 v10, v4, v10 7684; GFX6-NEXT: v_or_b32_e32 v11, v5, v11 7685; GFX6-NEXT: v_lshl_b64 v[4:5], v[8:9], v17 7686; GFX6-NEXT: v_lshl_b64 v[8:9], v[8:9], v18 7687; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 7688; GFX6-NEXT: v_and_b32_e32 v16, 0x7f, v20 7689; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v4, vcc 7690; GFX6-NEXT: v_cndmask_b32_e32 v19, 0, v5, vcc 7691; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v10, vcc 7692; GFX6-NEXT: v_cndmask_b32_e32 v5, v9, v11, vcc 7693; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 7694; GFX6-NEXT: v_cndmask_b32_e32 v8, v4, v6, vcc 7695; GFX6-NEXT: v_cndmask_b32_e32 v9, v5, v7, vcc 7696; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 64, v16 7697; GFX6-NEXT: v_lshr_b64 v[4:5], v[12:13], v16 7698; GFX6-NEXT: v_lshl_b64 v[6:7], v[14:15], v6 7699; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, 64, v16 7700; GFX6-NEXT: v_or_b32_e32 v11, v4, v6 7701; GFX6-NEXT: v_or_b32_e32 v17, v5, v7 7702; GFX6-NEXT: v_lshr_b64 v[6:7], v[14:15], v10 7703; GFX6-NEXT: v_lshr_b64 v[4:5], v[14:15], v16 7704; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 7705; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc 7706; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc 7707; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16 7708; GFX6-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] 7709; GFX6-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[4:5] 7710; GFX6-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc 7711; GFX6-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc 7712; GFX6-NEXT: v_or_b32_e32 v4, v18, v6 7713; GFX6-NEXT: v_or_b32_e32 v5, v19, v7 7714; GFX6-NEXT: v_or_b32_e32 v6, v8, v10 7715; GFX6-NEXT: v_or_b32_e32 v7, v9, v11 7716; GFX6-NEXT: s_setpc_b64 s[30:31] 7717; 7718; GFX8-LABEL: v_fshr_v2i128: 7719; GFX8: ; %bb.0: 7720; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7721; GFX8-NEXT: v_xor_b32_e32 v17, -1, v16 7722; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 7723; GFX8-NEXT: v_and_b32_e32 v23, 0x7f, v17 7724; GFX8-NEXT: v_lshrrev_b32_e32 v17, 31, v1 7725; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 7726; GFX8-NEXT: v_or_b32_e32 v2, v2, v17 7727; GFX8-NEXT: v_sub_u32_e32 v17, vcc, 64, v23 7728; GFX8-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1] 7729; GFX8-NEXT: v_lshlrev_b64 v[21:22], v23, v[2:3] 7730; GFX8-NEXT: v_and_b32_e32 v24, 0x7f, v16 7731; GFX8-NEXT: v_sub_u32_e32 v16, vcc, 64, v24 7732; GFX8-NEXT: v_or_b32_e32 v21, v17, v21 7733; GFX8-NEXT: v_or_b32_e32 v22, v18, v22 7734; GFX8-NEXT: v_lshlrev_b64 v[16:17], v16, v[10:11] 7735; GFX8-NEXT: v_lshrrev_b64 v[18:19], v24, v[8:9] 7736; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23 7737; GFX8-NEXT: v_or_b32_e32 v18, v18, v16 7738; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v23 7739; GFX8-NEXT: v_or_b32_e32 v19, v19, v17 7740; GFX8-NEXT: v_lshlrev_b64 v[16:17], v16, v[0:1] 7741; GFX8-NEXT: v_lshlrev_b64 v[0:1], v23, v[0:1] 7742; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23 7743; GFX8-NEXT: v_cndmask_b32_e32 v25, 0, v0, vcc 7744; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v21, vcc 7745; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v22, vcc 7746; GFX8-NEXT: v_cndmask_b32_e64 v17, v0, v2, s[4:5] 7747; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v3, s[4:5] 7748; GFX8-NEXT: v_subrev_u32_e64 v0, s[4:5], 64, v24 7749; GFX8-NEXT: v_lshrrev_b64 v[2:3], v0, v[10:11] 7750; GFX8-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24 7751; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5] 7752; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v1, vcc 7753; GFX8-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11] 7754; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 7755; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[4:5] 7756; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 7757; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v0, s[4:5] 7758; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc 7759; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, v1, s[4:5] 7760; GFX8-NEXT: v_or_b32_e32 v0, v25, v2 7761; GFX8-NEXT: v_or_b32_e32 v2, v17, v8 7762; GFX8-NEXT: v_xor_b32_e32 v8, -1, v20 7763; GFX8-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 7764; GFX8-NEXT: v_or_b32_e32 v1, v18, v3 7765; GFX8-NEXT: v_or_b32_e32 v3, v16, v9 7766; GFX8-NEXT: v_and_b32_e32 v17, 0x7f, v8 7767; GFX8-NEXT: v_lshlrev_b64 v[8:9], 1, v[4:5] 7768; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v5 7769; GFX8-NEXT: v_or_b32_e32 v6, v6, v4 7770; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 64, v17 7771; GFX8-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9] 7772; GFX8-NEXT: v_lshlrev_b64 v[10:11], v17, v[6:7] 7773; GFX8-NEXT: v_subrev_u32_e32 v18, vcc, 64, v17 7774; GFX8-NEXT: v_or_b32_e32 v10, v4, v10 7775; GFX8-NEXT: v_or_b32_e32 v11, v5, v11 7776; GFX8-NEXT: v_lshlrev_b64 v[4:5], v17, v[8:9] 7777; GFX8-NEXT: v_lshlrev_b64 v[8:9], v18, v[8:9] 7778; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 7779; GFX8-NEXT: v_and_b32_e32 v16, 0x7f, v20 7780; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v4, vcc 7781; GFX8-NEXT: v_cndmask_b32_e32 v19, 0, v5, vcc 7782; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v10, vcc 7783; GFX8-NEXT: v_cndmask_b32_e32 v5, v9, v11, vcc 7784; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 7785; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v6, vcc 7786; GFX8-NEXT: v_cndmask_b32_e32 v9, v5, v7, vcc 7787; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 64, v16 7788; GFX8-NEXT: v_lshrrev_b64 v[4:5], v16, v[12:13] 7789; GFX8-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15] 7790; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, 64, v16 7791; GFX8-NEXT: v_or_b32_e32 v11, v4, v6 7792; GFX8-NEXT: v_or_b32_e32 v17, v5, v7 7793; GFX8-NEXT: v_lshrrev_b64 v[6:7], v10, v[14:15] 7794; GFX8-NEXT: v_lshrrev_b64 v[4:5], v16, v[14:15] 7795; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 7796; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc 7797; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc 7798; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16 7799; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] 7800; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[4:5] 7801; GFX8-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc 7802; GFX8-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc 7803; GFX8-NEXT: v_or_b32_e32 v4, v18, v6 7804; GFX8-NEXT: v_or_b32_e32 v5, v19, v7 7805; GFX8-NEXT: v_or_b32_e32 v6, v8, v10 7806; GFX8-NEXT: v_or_b32_e32 v7, v9, v11 7807; GFX8-NEXT: s_setpc_b64 s[30:31] 7808; 7809; GFX9-LABEL: v_fshr_v2i128: 7810; GFX9: ; %bb.0: 7811; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7812; GFX9-NEXT: v_xor_b32_e32 v17, -1, v16 7813; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 7814; GFX9-NEXT: v_and_b32_e32 v23, 0x7f, v17 7815; GFX9-NEXT: v_lshrrev_b32_e32 v17, 31, v1 7816; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 7817; GFX9-NEXT: v_or_b32_e32 v2, v2, v17 7818; GFX9-NEXT: v_sub_u32_e32 v17, 64, v23 7819; GFX9-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1] 7820; GFX9-NEXT: v_lshlrev_b64 v[21:22], v23, v[2:3] 7821; GFX9-NEXT: v_and_b32_e32 v24, 0x7f, v16 7822; GFX9-NEXT: v_sub_u32_e32 v16, 64, v24 7823; GFX9-NEXT: v_or_b32_e32 v21, v17, v21 7824; GFX9-NEXT: v_or_b32_e32 v22, v18, v22 7825; GFX9-NEXT: v_lshlrev_b64 v[16:17], v16, v[10:11] 7826; GFX9-NEXT: v_lshrrev_b64 v[18:19], v24, v[8:9] 7827; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23 7828; GFX9-NEXT: v_or_b32_e32 v18, v18, v16 7829; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v23 7830; GFX9-NEXT: v_or_b32_e32 v19, v19, v17 7831; GFX9-NEXT: v_lshlrev_b64 v[16:17], v16, v[0:1] 7832; GFX9-NEXT: v_lshlrev_b64 v[0:1], v23, v[0:1] 7833; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23 7834; GFX9-NEXT: v_cndmask_b32_e32 v25, 0, v0, vcc 7835; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v21, vcc 7836; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v22, vcc 7837; GFX9-NEXT: v_cndmask_b32_e64 v17, v0, v2, s[4:5] 7838; GFX9-NEXT: v_subrev_u32_e32 v0, 64, v24 7839; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v3, s[4:5] 7840; GFX9-NEXT: v_lshrrev_b64 v[2:3], v0, v[10:11] 7841; GFX9-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24 7842; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5] 7843; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v1, vcc 7844; GFX9-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11] 7845; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 7846; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[4:5] 7847; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 7848; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v0, s[4:5] 7849; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc 7850; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, v1, s[4:5] 7851; GFX9-NEXT: v_or_b32_e32 v0, v25, v2 7852; GFX9-NEXT: v_or_b32_e32 v2, v17, v8 7853; GFX9-NEXT: v_xor_b32_e32 v8, -1, v20 7854; GFX9-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 7855; GFX9-NEXT: v_or_b32_e32 v1, v18, v3 7856; GFX9-NEXT: v_or_b32_e32 v3, v16, v9 7857; GFX9-NEXT: v_and_b32_e32 v17, 0x7f, v8 7858; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[4:5] 7859; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v5 7860; GFX9-NEXT: v_or_b32_e32 v6, v6, v4 7861; GFX9-NEXT: v_sub_u32_e32 v4, 64, v17 7862; GFX9-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9] 7863; GFX9-NEXT: v_lshlrev_b64 v[10:11], v17, v[6:7] 7864; GFX9-NEXT: v_subrev_u32_e32 v18, 64, v17 7865; GFX9-NEXT: v_or_b32_e32 v10, v4, v10 7866; GFX9-NEXT: v_or_b32_e32 v11, v5, v11 7867; GFX9-NEXT: v_lshlrev_b64 v[4:5], v17, v[8:9] 7868; GFX9-NEXT: v_lshlrev_b64 v[8:9], v18, v[8:9] 7869; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 7870; GFX9-NEXT: v_and_b32_e32 v16, 0x7f, v20 7871; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v4, vcc 7872; GFX9-NEXT: v_cndmask_b32_e32 v19, 0, v5, vcc 7873; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v10, vcc 7874; GFX9-NEXT: v_cndmask_b32_e32 v5, v9, v11, vcc 7875; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 7876; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v6, vcc 7877; GFX9-NEXT: v_sub_u32_e32 v6, 64, v16 7878; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v7, vcc 7879; GFX9-NEXT: v_lshrrev_b64 v[4:5], v16, v[12:13] 7880; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15] 7881; GFX9-NEXT: v_subrev_u32_e32 v10, 64, v16 7882; GFX9-NEXT: v_or_b32_e32 v11, v4, v6 7883; GFX9-NEXT: v_or_b32_e32 v17, v5, v7 7884; GFX9-NEXT: v_lshrrev_b64 v[6:7], v10, v[14:15] 7885; GFX9-NEXT: v_lshrrev_b64 v[4:5], v16, v[14:15] 7886; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 7887; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc 7888; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc 7889; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16 7890; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] 7891; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[4:5] 7892; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc 7893; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc 7894; GFX9-NEXT: v_or_b32_e32 v4, v18, v6 7895; GFX9-NEXT: v_or_b32_e32 v5, v19, v7 7896; GFX9-NEXT: v_or_b32_e32 v6, v8, v10 7897; GFX9-NEXT: v_or_b32_e32 v7, v9, v11 7898; GFX9-NEXT: s_setpc_b64 s[30:31] 7899; 7900; GFX10-LABEL: v_fshr_v2i128: 7901; GFX10: ; %bb.0: 7902; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7903; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 7904; GFX10-NEXT: v_xor_b32_e32 v17, -1, v16 7905; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 7906; GFX10-NEXT: v_and_b32_e32 v26, 0x7f, v16 7907; GFX10-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 7908; GFX10-NEXT: v_and_b32_e32 v25, 0x7f, v17 7909; GFX10-NEXT: v_lshrrev_b32_e32 v17, 31, v1 7910; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 7911; GFX10-NEXT: v_subrev_nc_u32_e32 v27, 64, v26 7912; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v26 7913; GFX10-NEXT: v_sub_nc_u32_e32 v18, 64, v25 7914; GFX10-NEXT: v_or_b32_e32 v2, v2, v17 7915; GFX10-NEXT: v_subrev_nc_u32_e32 v19, 64, v25 7916; GFX10-NEXT: v_lshlrev_b64 v[23:24], v25, v[0:1] 7917; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25 7918; GFX10-NEXT: v_lshrrev_b64 v[17:18], v18, v[0:1] 7919; GFX10-NEXT: v_lshlrev_b64 v[21:22], v25, v[2:3] 7920; GFX10-NEXT: v_lshlrev_b64 v[0:1], v19, v[0:1] 7921; GFX10-NEXT: v_cndmask_b32_e32 v23, 0, v23, vcc_lo 7922; GFX10-NEXT: v_cndmask_b32_e32 v24, 0, v24, vcc_lo 7923; GFX10-NEXT: v_or_b32_e32 v22, v18, v22 7924; GFX10-NEXT: v_sub_nc_u32_e32 v18, 64, v26 7925; GFX10-NEXT: v_or_b32_e32 v21, v17, v21 7926; GFX10-NEXT: v_lshrrev_b64 v[16:17], v26, v[8:9] 7927; GFX10-NEXT: v_cndmask_b32_e32 v22, v1, v22, vcc_lo 7928; GFX10-NEXT: v_lshlrev_b64 v[18:19], v18, v[10:11] 7929; GFX10-NEXT: v_cndmask_b32_e32 v21, v0, v21, vcc_lo 7930; GFX10-NEXT: v_lshrrev_b64 v[0:1], v27, v[10:11] 7931; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v25 7932; GFX10-NEXT: v_or_b32_e32 v16, v16, v18 7933; GFX10-NEXT: v_or_b32_e32 v17, v17, v19 7934; GFX10-NEXT: v_cndmask_b32_e32 v18, v21, v2, vcc_lo 7935; GFX10-NEXT: v_cndmask_b32_e32 v22, v22, v3, vcc_lo 7936; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v26 7937; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v16, s4 7938; GFX10-NEXT: v_xor_b32_e32 v16, -1, v20 7939; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v17, s4 7940; GFX10-NEXT: v_lshrrev_b64 v[2:3], v26, v[10:11] 7941; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 7942; GFX10-NEXT: v_and_b32_e32 v25, 0x7f, v16 7943; GFX10-NEXT: v_lshrrev_b32_e32 v8, 31, v5 7944; GFX10-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5] 7945; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 7946; GFX10-NEXT: v_or_b32_e32 v0, v23, v0 7947; GFX10-NEXT: v_sub_nc_u32_e32 v9, 64, v25 7948; GFX10-NEXT: v_or_b32_e32 v6, v6, v8 7949; GFX10-NEXT: v_and_b32_e32 v23, 0x7f, v20 7950; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s4 7951; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, v3, s4 7952; GFX10-NEXT: v_lshrrev_b64 v[8:9], v9, v[4:5] 7953; GFX10-NEXT: v_lshlrev_b64 v[10:11], v25, v[6:7] 7954; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v23 7955; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 64, v25 7956; GFX10-NEXT: v_or_b32_e32 v2, v18, v2 7957; GFX10-NEXT: v_lshlrev_b64 v[16:17], v25, v[4:5] 7958; GFX10-NEXT: v_lshrrev_b64 v[18:19], v23, v[12:13] 7959; GFX10-NEXT: v_or_b32_e32 v10, v8, v10 7960; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v23 7961; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] 7962; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25 7963; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5] 7964; GFX10-NEXT: v_or_b32_e32 v5, v9, v11 7965; GFX10-NEXT: v_lshrrev_b64 v[8:9], v8, v[14:15] 7966; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v23 7967; GFX10-NEXT: v_cndmask_b32_e32 v11, 0, v16, vcc_lo 7968; GFX10-NEXT: v_or_b32_e32 v16, v18, v20 7969; GFX10-NEXT: v_or_b32_e32 v18, v19, v21 7970; GFX10-NEXT: v_cndmask_b32_e32 v10, v3, v10, vcc_lo 7971; GFX10-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo 7972; GFX10-NEXT: v_lshrrev_b64 v[3:4], v23, v[14:15] 7973; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v16, s4 7974; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v23 7975; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v25 7976; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v18, s4 7977; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo 7978; GFX10-NEXT: v_or_b32_e32 v1, v24, v1 7979; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v6, s6 7980; GFX10-NEXT: v_cndmask_b32_e64 v7, v5, v7, s6 7981; GFX10-NEXT: v_cndmask_b32_e64 v5, v8, v12, s5 7982; GFX10-NEXT: v_cndmask_b32_e64 v8, v9, v13, s5 7983; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v3, s4 7984; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v4, s4 7985; GFX10-NEXT: v_or_b32_e32 v3, v22, v26 7986; GFX10-NEXT: v_or_b32_e32 v4, v11, v5 7987; GFX10-NEXT: v_or_b32_e32 v5, v14, v8 7988; GFX10-NEXT: v_or_b32_e32 v6, v6, v9 7989; GFX10-NEXT: v_or_b32_e32 v7, v7, v10 7990; GFX10-NEXT: s_setpc_b64 s[30:31] 7991; 7992; GFX11-LABEL: v_fshr_v2i128: 7993; GFX11: ; %bb.0: 7994; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7995; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 7996; GFX11-NEXT: v_xor_b32_e32 v17, -1, v16 7997; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 7998; GFX11-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 7999; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3) 8000; GFX11-NEXT: v_and_b32_e32 v25, 0x7f, v17 8001; GFX11-NEXT: v_lshrrev_b32_e32 v17, 31, v1 8002; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 8003; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25 8004; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 8005; GFX11-NEXT: v_or_b32_e32 v2, v2, v17 8006; GFX11-NEXT: v_lshlrev_b64 v[23:24], v25, v[0:1] 8007; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 8008; GFX11-NEXT: v_dual_cndmask_b32 v23, 0, v23 :: v_dual_and_b32 v26, 0x7f, v16 8009; GFX11-NEXT: v_cndmask_b32_e32 v24, 0, v24, vcc_lo 8010; GFX11-NEXT: v_sub_nc_u32_e32 v18, 64, v25 8011; GFX11-NEXT: v_lshlrev_b64 v[21:22], v25, v[2:3] 8012; GFX11-NEXT: v_subrev_nc_u32_e32 v19, 64, v25 8013; GFX11-NEXT: v_subrev_nc_u32_e32 v27, 64, v26 8014; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v26 8015; GFX11-NEXT: v_lshrrev_b64 v[17:18], v18, v[0:1] 8016; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 8017; GFX11-NEXT: v_lshlrev_b64 v[0:1], v19, v[0:1] 8018; GFX11-NEXT: v_or_b32_e32 v22, v18, v22 8019; GFX11-NEXT: v_sub_nc_u32_e32 v18, 64, v26 8020; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) 8021; GFX11-NEXT: v_or_b32_e32 v21, v17, v21 8022; GFX11-NEXT: v_lshrrev_b64 v[16:17], v26, v[8:9] 8023; GFX11-NEXT: v_cndmask_b32_e32 v22, v1, v22, vcc_lo 8024; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 8025; GFX11-NEXT: v_lshlrev_b64 v[18:19], v18, v[10:11] 8026; GFX11-NEXT: v_cndmask_b32_e32 v21, v0, v21, vcc_lo 8027; GFX11-NEXT: v_lshrrev_b64 v[0:1], v27, v[10:11] 8028; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v25 8029; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) 8030; GFX11-NEXT: v_or_b32_e32 v16, v16, v18 8031; GFX11-NEXT: v_or_b32_e32 v17, v17, v19 8032; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v16, s0 8033; GFX11-NEXT: v_xor_b32_e32 v16, -1, v20 8034; GFX11-NEXT: v_cndmask_b32_e32 v18, v21, v2, vcc_lo 8035; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 8036; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v17, s0 8037; GFX11-NEXT: v_cndmask_b32_e32 v22, v22, v3, vcc_lo 8038; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v26 8039; GFX11-NEXT: v_and_b32_e32 v25, 0x7f, v16 8040; GFX11-NEXT: v_lshrrev_b64 v[2:3], v26, v[10:11] 8041; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v9 :: v_dual_cndmask_b32 v0, v0, v8 8042; GFX11-NEXT: v_lshrrev_b32_e32 v8, 31, v5 8043; GFX11-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5] 8044; GFX11-NEXT: v_sub_nc_u32_e32 v9, 64, v25 8045; GFX11-NEXT: v_cndmask_b32_e64 v26, 0, v3, s0 8046; GFX11-NEXT: v_subrev_nc_u32_e32 v3, 64, v25 8047; GFX11-NEXT: v_or_b32_e32 v6, v6, v8 8048; GFX11-NEXT: v_or_b32_e32 v0, v23, v0 8049; GFX11-NEXT: v_lshrrev_b64 v[8:9], v9, v[4:5] 8050; GFX11-NEXT: v_lshlrev_b64 v[16:17], v25, v[4:5] 8051; GFX11-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5] 8052; GFX11-NEXT: v_lshlrev_b64 v[10:11], v25, v[6:7] 8053; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25 8054; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 8055; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v25 8056; GFX11-NEXT: v_or_b32_e32 v1, v24, v1 8057; GFX11-NEXT: v_or_b32_e32 v10, v8, v10 8058; GFX11-NEXT: v_and_b32_e32 v23, 0x7f, v20 8059; GFX11-NEXT: v_or_b32_e32 v2, v18, v2 8060; GFX11-NEXT: v_or_b32_e32 v5, v9, v11 8061; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 8062; GFX11-NEXT: v_dual_cndmask_b32 v11, 0, v16 :: v_dual_cndmask_b32 v10, v3, v10 8063; GFX11-NEXT: v_sub_nc_u32_e32 v20, 64, v23 8064; GFX11-NEXT: v_subrev_nc_u32_e32 v8, 64, v23 8065; GFX11-NEXT: v_lshrrev_b64 v[18:19], v23, v[12:13] 8066; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v23 8067; GFX11-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo 8068; GFX11-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] 8069; GFX11-NEXT: v_lshrrev_b64 v[8:9], v8, v[14:15] 8070; GFX11-NEXT: v_lshrrev_b64 v[3:4], v23, v[14:15] 8071; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo 8072; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v23 8073; GFX11-NEXT: v_cndmask_b32_e64 v6, v10, v6, s2 8074; GFX11-NEXT: v_or_b32_e32 v16, v18, v20 8075; GFX11-NEXT: v_or_b32_e32 v18, v19, v21 8076; GFX11-NEXT: v_cndmask_b32_e64 v7, v5, v7, s2 8077; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v4, s0 8078; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 8079; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v16, s0 8080; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v18, s0 8081; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 8082; GFX11-NEXT: v_or_b32_e32 v7, v7, v10 8083; GFX11-NEXT: v_cndmask_b32_e64 v5, v8, v12, s1 8084; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) 8085; GFX11-NEXT: v_cndmask_b32_e64 v8, v9, v13, s1 8086; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v3, s0 8087; GFX11-NEXT: v_or_b32_e32 v3, v22, v26 8088; GFX11-NEXT: v_or_b32_e32 v4, v11, v5 8089; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 8090; GFX11-NEXT: v_or_b32_e32 v5, v14, v8 8091; GFX11-NEXT: v_or_b32_e32 v6, v6, v9 8092; GFX11-NEXT: s_setpc_b64 s[30:31] 8093 %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) 8094 ret <2 x i128> %result 8095} 8096 8097declare i7 @llvm.fshr.i7(i7, i7, i7) #0 8098declare i8 @llvm.fshr.i8(i8, i8, i8) #0 8099declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0 8100declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0 8101 8102declare i16 @llvm.fshr.i16(i16, i16, i16) #0 8103declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0 8104declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0 8105declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0 8106declare <5 x i16> @llvm.fshr.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0 8107declare <6 x i16> @llvm.fshr.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0 8108declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0 8109 8110declare i24 @llvm.fshr.i24(i24, i24, i24) #0 8111declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0 8112 8113declare i32 @llvm.fshr.i32(i32, i32, i32) #0 8114declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0 8115declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0 8116declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0 8117declare <5 x i32> @llvm.fshr.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0 8118declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0 8119 8120declare i48 @llvm.fshr.i48(i48, i48, i48) #0 8121 8122declare i64 @llvm.fshr.i64(i64, i64, i64) #0 8123declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0 8124 8125declare i128 @llvm.fshr.i128(i128, i128, i128) #0 8126declare <2 x i128> @llvm.fshr.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0 8127 8128attributes #0 = { nounwind readnone speculatable willreturn } 8129