1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefixes=GCN,GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s 6 7define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) { 8; GFX6-LABEL: s_fshr_i7: 9; GFX6: ; %bb.0: 10; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 11; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 12; GFX6-NEXT: s_movk_i32 s3, 0x7f 13; GFX6-NEXT: s_and_b32 s2, s2, s3 14; GFX6-NEXT: s_lshl_b32 s0, s0, 1 15; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 16; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 17; GFX6-NEXT: s_and_b32 s1, s1, s3 18; GFX6-NEXT: v_mul_lo_u32 v1, -7, v0 19; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 20; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 21; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 22; GFX6-NEXT: v_mul_lo_u32 v0, v0, 7 23; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 24; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0 25; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 26; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 27; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 7, v0 28; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 29; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 30; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 6, v0 31; GFX6-NEXT: v_and_b32_e32 v0, s3, v0 32; GFX6-NEXT: v_and_b32_e32 v1, s3, v1 33; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 34; GFX6-NEXT: v_lshr_b32_e32 v0, s1, v0 35; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 36; GFX6-NEXT: v_readfirstlane_b32 s0, v0 37; GFX6-NEXT: ; return to shader part epilog 38; 39; GFX8-LABEL: s_fshr_i7: 40; GFX8: ; %bb.0: 41; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 42; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 43; GFX8-NEXT: s_movk_i32 s3, 0x7f 44; GFX8-NEXT: s_and_b32 s2, s2, s3 45; GFX8-NEXT: s_lshl_b32 s0, s0, 1 46; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 47; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 48; GFX8-NEXT: s_and_b32 s1, s1, s3 49; GFX8-NEXT: v_mul_lo_u32 v1, -7, v0 50; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 51; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 52; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0 53; GFX8-NEXT: v_mul_lo_u32 v0, v0, 7 54; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0 55; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0 56; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 57; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 58; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 7, v0 59; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 60; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 61; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0 62; GFX8-NEXT: v_and_b32_e32 v0, s3, v0 63; GFX8-NEXT: v_and_b32_e32 v1, s3, v1 64; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0 65; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s1 66; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 67; GFX8-NEXT: v_readfirstlane_b32 s0, v0 68; GFX8-NEXT: ; return to shader part epilog 69; 70; GFX9-LABEL: s_fshr_i7: 71; GFX9: ; %bb.0: 72; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 73; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 74; GFX9-NEXT: s_movk_i32 s3, 0x7f 75; GFX9-NEXT: s_and_b32 s2, s2, s3 76; GFX9-NEXT: s_lshl_b32 s0, s0, 1 77; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 78; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 79; GFX9-NEXT: s_and_b32 s1, s1, s3 80; GFX9-NEXT: v_mul_lo_u32 v1, -7, v0 81; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 82; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 83; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0 84; GFX9-NEXT: v_mul_lo_u32 v0, v0, 7 85; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0 86; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0 87; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 88; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 89; GFX9-NEXT: v_subrev_u32_e32 v1, 7, v0 90; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 91; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 92; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0 93; GFX9-NEXT: v_and_b32_e32 v0, s3, v0 94; GFX9-NEXT: v_and_b32_e32 v1, s3, v1 95; GFX9-NEXT: v_lshlrev_b16_e64 v1, v1, s0 96; GFX9-NEXT: v_lshrrev_b16_e64 v0, v0, s1 97; GFX9-NEXT: v_or_b32_e32 v0, v1, v0 98; GFX9-NEXT: v_readfirstlane_b32 s0, v0 99; GFX9-NEXT: ; return to shader part epilog 100; 101; GFX10-LABEL: s_fshr_i7: 102; GFX10: ; %bb.0: 103; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 104; GFX10-NEXT: s_movk_i32 s3, 0x7f 105; GFX10-NEXT: s_lshl_b32 s0, s0, 1 106; GFX10-NEXT: s_and_b32 s2, s2, s3 107; GFX10-NEXT: s_and_b32 s1, s1, s3 108; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 109; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 110; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 111; GFX10-NEXT: v_mul_lo_u32 v1, -7, v0 112; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 113; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 114; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0 115; GFX10-NEXT: v_mul_lo_u32 v0, v0, 7 116; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0 117; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0 118; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 119; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 120; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 7, v0 121; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 122; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 123; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0 124; GFX10-NEXT: v_and_b32_e32 v0, s3, v0 125; GFX10-NEXT: v_and_b32_e32 v1, s3, v1 126; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1 127; GFX10-NEXT: v_lshlrev_b16 v1, v1, s0 128; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 129; GFX10-NEXT: v_readfirstlane_b32 s0, v0 130; GFX10-NEXT: ; return to shader part epilog 131 %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt) 132 ret i7 %result 133} 134 135define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) { 136; GFX6-LABEL: v_fshr_i7: 137; GFX6: ; %bb.0: 138; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 139; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 140; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 141; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2 142; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 143; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 144; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 145; GFX6-NEXT: v_mul_lo_u32 v4, -7, v3 146; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4 147; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 148; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 149; GFX6-NEXT: v_mov_b32_e32 v4, 0x7f 150; GFX6-NEXT: v_and_b32_e32 v1, v1, v4 151; GFX6-NEXT: v_mul_lo_u32 v3, v3, 7 152; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 153; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2 154; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 155; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 156; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 7, v2 157; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 158; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 159; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 6, v2 160; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 161; GFX6-NEXT: v_and_b32_e32 v3, v3, v4 162; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0 163; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 164; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 165; GFX6-NEXT: s_setpc_b64 s[30:31] 166; 167; GFX8-LABEL: v_fshr_i7: 168; GFX8: ; %bb.0: 169; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 171; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3 172; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2 173; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 174; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 175; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 176; GFX8-NEXT: v_mul_lo_u32 v4, -7, v3 177; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4 178; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4 179; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3 180; GFX8-NEXT: v_mov_b32_e32 v4, 0x7f 181; GFX8-NEXT: v_and_b32_e32 v1, v1, v4 182; GFX8-NEXT: v_mul_lo_u32 v3, v3, 7 183; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 184; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2 185; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 186; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 187; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 7, v2 188; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 189; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 190; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2 191; GFX8-NEXT: v_and_b32_e32 v2, v2, v4 192; GFX8-NEXT: v_and_b32_e32 v3, v3, v4 193; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0 194; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 195; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 196; GFX8-NEXT: s_setpc_b64 s[30:31] 197; 198; GFX9-LABEL: v_fshr_i7: 199; GFX9: ; %bb.0: 200; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 202; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 203; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2 204; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 205; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 206; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 207; GFX9-NEXT: v_mul_lo_u32 v4, -7, v3 208; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 209; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 210; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 211; GFX9-NEXT: v_mov_b32_e32 v4, 0x7f 212; GFX9-NEXT: v_and_b32_e32 v1, v1, v4 213; GFX9-NEXT: v_mul_lo_u32 v3, v3, 7 214; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 215; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2 216; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 217; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 218; GFX9-NEXT: v_subrev_u32_e32 v3, 7, v2 219; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 220; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 221; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2 222; GFX9-NEXT: v_and_b32_e32 v2, v2, v4 223; GFX9-NEXT: v_and_b32_e32 v3, v3, v4 224; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0 225; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1 226; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 227; GFX9-NEXT: s_setpc_b64 s[30:31] 228; 229; GFX10-LABEL: v_fshr_i7: 230; GFX10: ; %bb.0: 231; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 232; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 233; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 234; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2 235; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 236; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1 237; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 238; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 239; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 240; GFX10-NEXT: v_mul_lo_u32 v4, -7, v3 241; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4 242; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 243; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3 244; GFX10-NEXT: v_mul_lo_u32 v3, v3, 7 245; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3 246; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2 247; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 248; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 249; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 7, v2 250; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 251; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 252; GFX10-NEXT: v_mov_b32_e32 v3, 0x7f 253; GFX10-NEXT: v_sub_nc_u16 v4, 6, v2 254; GFX10-NEXT: v_and_b32_e32 v2, v2, v3 255; GFX10-NEXT: v_and_b32_e32 v4, v4, v3 256; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 257; GFX10-NEXT: v_lshlrev_b16 v0, v4, v0 258; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 259; GFX10-NEXT: s_setpc_b64 s[30:31] 260 %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt) 261 ret i7 %result 262} 263 264define amdgpu_ps i8 @s_fshr_i8(i8 inreg %lhs, i8 inreg %rhs, i8 inreg %amt) { 265; GFX6-LABEL: s_fshr_i8: 266; GFX6: ; %bb.0: 267; GFX6-NEXT: s_and_b32 s3, s2, 7 268; GFX6-NEXT: s_andn2_b32 s2, 7, s2 269; GFX6-NEXT: s_lshl_b32 s0, s0, 1 270; GFX6-NEXT: s_and_b32 s1, s1, 0xff 271; GFX6-NEXT: s_lshl_b32 s0, s0, s2 272; GFX6-NEXT: s_lshr_b32 s1, s1, s3 273; GFX6-NEXT: s_or_b32 s0, s0, s1 274; GFX6-NEXT: ; return to shader part epilog 275; 276; GFX8-LABEL: s_fshr_i8: 277; GFX8: ; %bb.0: 278; GFX8-NEXT: s_and_b32 s1, s1, 0xff 279; GFX8-NEXT: s_and_b32 s3, s2, 7 280; GFX8-NEXT: s_andn2_b32 s2, 7, s2 281; GFX8-NEXT: s_lshl_b32 s0, s0, 1 282; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 283; GFX8-NEXT: s_lshl_b32 s0, s0, s2 284; GFX8-NEXT: s_lshr_b32 s1, s1, s3 285; GFX8-NEXT: s_or_b32 s0, s0, s1 286; GFX8-NEXT: ; return to shader part epilog 287; 288; GFX9-LABEL: s_fshr_i8: 289; GFX9: ; %bb.0: 290; GFX9-NEXT: s_and_b32 s1, s1, 0xff 291; GFX9-NEXT: s_and_b32 s3, s2, 7 292; GFX9-NEXT: s_andn2_b32 s2, 7, s2 293; GFX9-NEXT: s_lshl_b32 s0, s0, 1 294; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 295; GFX9-NEXT: s_lshl_b32 s0, s0, s2 296; GFX9-NEXT: s_lshr_b32 s1, s1, s3 297; GFX9-NEXT: s_or_b32 s0, s0, s1 298; GFX9-NEXT: ; return to shader part epilog 299; 300; GFX10-LABEL: s_fshr_i8: 301; GFX10: ; %bb.0: 302; GFX10-NEXT: s_and_b32 s1, s1, 0xff 303; GFX10-NEXT: s_and_b32 s3, s2, 7 304; GFX10-NEXT: s_andn2_b32 s2, 7, s2 305; GFX10-NEXT: s_lshl_b32 s0, s0, 1 306; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 307; GFX10-NEXT: s_lshl_b32 s0, s0, s2 308; GFX10-NEXT: s_lshr_b32 s1, s1, s3 309; GFX10-NEXT: s_or_b32 s0, s0, s1 310; GFX10-NEXT: ; return to shader part epilog 311 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt) 312 ret i8 %result 313} 314 315define i8 @v_fshr_i8(i8 %lhs, i8 %rhs, i8 %amt) { 316; GFX6-LABEL: v_fshr_i8: 317; GFX6: ; %bb.0: 318; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 319; GFX6-NEXT: v_and_b32_e32 v3, 7, v2 320; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 321; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 322; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 323; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 324; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 325; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v1 326; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 327; GFX6-NEXT: s_setpc_b64 s[30:31] 328; 329; GFX8-LABEL: v_fshr_i8: 330; GFX8: ; %bb.0: 331; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 332; GFX8-NEXT: v_and_b32_e32 v3, 7, v2 333; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 334; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 335; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 336; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 337; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 338; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 339; GFX8-NEXT: s_setpc_b64 s[30:31] 340; 341; GFX9-LABEL: v_fshr_i8: 342; GFX9: ; %bb.0: 343; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 344; GFX9-NEXT: v_and_b32_e32 v3, 7, v2 345; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 346; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 347; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 348; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 349; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 350; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 351; GFX9-NEXT: s_setpc_b64 s[30:31] 352; 353; GFX10-LABEL: v_fshr_i8: 354; GFX10: ; %bb.0: 355; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 356; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 357; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 358; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 359; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 360; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 361; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 362; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 363; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0 364; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 365; GFX10-NEXT: s_setpc_b64 s[30:31] 366 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt) 367 ret i8 %result 368} 369 370define amdgpu_ps i8 @s_fshr_i8_4(i8 inreg %lhs, i8 inreg %rhs) { 371; GFX6-LABEL: s_fshr_i8_4: 372; GFX6: ; %bb.0: 373; GFX6-NEXT: s_lshl_b32 s0, s0, 4 374; GFX6-NEXT: s_bfe_u32 s1, s1, 0x40004 375; GFX6-NEXT: s_or_b32 s0, s0, s1 376; GFX6-NEXT: ; return to shader part epilog 377; 378; GFX8-LABEL: s_fshr_i8_4: 379; GFX8: ; %bb.0: 380; GFX8-NEXT: s_and_b32 s1, s1, 0xff 381; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 382; GFX8-NEXT: s_lshl_b32 s0, s0, 4 383; GFX8-NEXT: s_lshr_b32 s1, s1, 4 384; GFX8-NEXT: s_or_b32 s0, s0, s1 385; GFX8-NEXT: ; return to shader part epilog 386; 387; GFX9-LABEL: s_fshr_i8_4: 388; GFX9: ; %bb.0: 389; GFX9-NEXT: s_and_b32 s1, s1, 0xff 390; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 391; GFX9-NEXT: s_lshl_b32 s0, s0, 4 392; GFX9-NEXT: s_lshr_b32 s1, s1, 4 393; GFX9-NEXT: s_or_b32 s0, s0, s1 394; GFX9-NEXT: ; return to shader part epilog 395; 396; GFX10-LABEL: s_fshr_i8_4: 397; GFX10: ; %bb.0: 398; GFX10-NEXT: s_and_b32 s1, s1, 0xff 399; GFX10-NEXT: s_lshl_b32 s0, s0, 4 400; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 401; GFX10-NEXT: s_lshr_b32 s1, s1, 4 402; GFX10-NEXT: s_or_b32 s0, s0, s1 403; GFX10-NEXT: ; return to shader part epilog 404 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4) 405 ret i8 %result 406} 407 408define i8 @v_fshr_i8_4(i8 %lhs, i8 %rhs) { 409; GFX6-LABEL: v_fshr_i8_4: 410; GFX6: ; %bb.0: 411; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 412; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0 413; GFX6-NEXT: v_bfe_u32 v1, v1, 4, 4 414; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 415; GFX6-NEXT: s_setpc_b64 s[30:31] 416; 417; GFX8-LABEL: v_fshr_i8_4: 418; GFX8: ; %bb.0: 419; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 420; GFX8-NEXT: v_mov_b32_e32 v2, 4 421; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0 422; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 423; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 424; GFX8-NEXT: s_setpc_b64 s[30:31] 425; 426; GFX9-LABEL: v_fshr_i8_4: 427; GFX9: ; %bb.0: 428; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 429; GFX9-NEXT: s_mov_b32 s4, 4 430; GFX9-NEXT: v_lshlrev_b16_e32 v0, 4, v0 431; GFX9-NEXT: v_lshrrev_b16_sdwa v1, s4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 432; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 433; GFX9-NEXT: s_setpc_b64 s[30:31] 434; 435; GFX10-LABEL: v_fshr_i8_4: 436; GFX10: ; %bb.0: 437; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 438; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 439; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 440; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0 441; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1 442; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 443; GFX10-NEXT: s_setpc_b64 s[30:31] 444 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4) 445 ret i8 %result 446} 447 448define amdgpu_ps i8 @s_fshr_i8_5(i8 inreg %lhs, i8 inreg %rhs) { 449; GFX6-LABEL: s_fshr_i8_5: 450; GFX6: ; %bb.0: 451; GFX6-NEXT: s_lshl_b32 s0, s0, 3 452; GFX6-NEXT: s_bfe_u32 s1, s1, 0x30005 453; GFX6-NEXT: s_or_b32 s0, s0, s1 454; GFX6-NEXT: ; return to shader part epilog 455; 456; GFX8-LABEL: s_fshr_i8_5: 457; GFX8: ; %bb.0: 458; GFX8-NEXT: s_and_b32 s1, s1, 0xff 459; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 460; GFX8-NEXT: s_lshl_b32 s0, s0, 3 461; GFX8-NEXT: s_lshr_b32 s1, s1, 5 462; GFX8-NEXT: s_or_b32 s0, s0, s1 463; GFX8-NEXT: ; return to shader part epilog 464; 465; GFX9-LABEL: s_fshr_i8_5: 466; GFX9: ; %bb.0: 467; GFX9-NEXT: s_and_b32 s1, s1, 0xff 468; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 469; GFX9-NEXT: s_lshl_b32 s0, s0, 3 470; GFX9-NEXT: s_lshr_b32 s1, s1, 5 471; GFX9-NEXT: s_or_b32 s0, s0, s1 472; GFX9-NEXT: ; return to shader part epilog 473; 474; GFX10-LABEL: s_fshr_i8_5: 475; GFX10: ; %bb.0: 476; GFX10-NEXT: s_and_b32 s1, s1, 0xff 477; GFX10-NEXT: s_lshl_b32 s0, s0, 3 478; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 479; GFX10-NEXT: s_lshr_b32 s1, s1, 5 480; GFX10-NEXT: s_or_b32 s0, s0, s1 481; GFX10-NEXT: ; return to shader part epilog 482 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5) 483 ret i8 %result 484} 485 486define i8 @v_fshr_i8_5(i8 %lhs, i8 %rhs) { 487; GFX6-LABEL: v_fshr_i8_5: 488; GFX6: ; %bb.0: 489; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 490; GFX6-NEXT: v_lshlrev_b32_e32 v0, 3, v0 491; GFX6-NEXT: v_bfe_u32 v1, v1, 5, 3 492; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 493; GFX6-NEXT: s_setpc_b64 s[30:31] 494; 495; GFX8-LABEL: v_fshr_i8_5: 496; GFX8: ; %bb.0: 497; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 498; GFX8-NEXT: v_mov_b32_e32 v2, 5 499; GFX8-NEXT: v_lshlrev_b16_e32 v0, 3, v0 500; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 501; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 502; GFX8-NEXT: s_setpc_b64 s[30:31] 503; 504; GFX9-LABEL: v_fshr_i8_5: 505; GFX9: ; %bb.0: 506; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 507; GFX9-NEXT: v_mov_b32_e32 v2, 5 508; GFX9-NEXT: v_lshlrev_b16_e32 v0, 3, v0 509; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 510; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 511; GFX9-NEXT: s_setpc_b64 s[30:31] 512; 513; GFX10-LABEL: v_fshr_i8_5: 514; GFX10: ; %bb.0: 515; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 516; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 517; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 518; GFX10-NEXT: v_lshlrev_b16 v0, 3, v0 519; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1 520; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 521; GFX10-NEXT: s_setpc_b64 s[30:31] 522 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5) 523 ret i8 %result 524} 525 526define amdgpu_ps i16 @s_fshr_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) { 527; GFX6-LABEL: s_fshr_v2i8: 528; GFX6: ; %bb.0: 529; GFX6-NEXT: s_lshr_b32 s3, s0, 8 530; GFX6-NEXT: s_lshr_b32 s4, s2, 8 531; GFX6-NEXT: s_and_b32 s5, s2, 7 532; GFX6-NEXT: s_andn2_b32 s2, 7, s2 533; GFX6-NEXT: s_movk_i32 s6, 0xff 534; GFX6-NEXT: s_lshl_b32 s0, s0, 1 535; GFX6-NEXT: s_lshl_b32 s0, s0, s2 536; GFX6-NEXT: s_and_b32 s2, s1, s6 537; GFX6-NEXT: s_lshr_b32 s2, s2, s5 538; GFX6-NEXT: s_or_b32 s0, s0, s2 539; GFX6-NEXT: s_and_b32 s2, s4, 7 540; GFX6-NEXT: s_andn2_b32 s4, 7, s4 541; GFX6-NEXT: s_lshl_b32 s3, s3, 1 542; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80008 543; GFX6-NEXT: s_lshl_b32 s3, s3, s4 544; GFX6-NEXT: s_lshr_b32 s1, s1, s2 545; GFX6-NEXT: s_or_b32 s1, s3, s1 546; GFX6-NEXT: s_and_b32 s1, s1, s6 547; GFX6-NEXT: s_and_b32 s0, s0, s6 548; GFX6-NEXT: s_lshl_b32 s1, s1, 8 549; GFX6-NEXT: s_or_b32 s0, s0, s1 550; GFX6-NEXT: ; return to shader part epilog 551; 552; GFX8-LABEL: s_fshr_v2i8: 553; GFX8: ; %bb.0: 554; GFX8-NEXT: s_lshr_b32 s3, s0, 8 555; GFX8-NEXT: s_lshr_b32 s5, s2, 8 556; GFX8-NEXT: s_and_b32 s6, s2, 7 557; GFX8-NEXT: s_andn2_b32 s2, 7, s2 558; GFX8-NEXT: s_lshl_b32 s0, s0, 1 559; GFX8-NEXT: s_lshl_b32 s0, s0, s2 560; GFX8-NEXT: s_movk_i32 s2, 0xff 561; GFX8-NEXT: s_lshr_b32 s4, s1, 8 562; GFX8-NEXT: s_and_b32 s1, s1, s2 563; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 564; GFX8-NEXT: s_lshr_b32 s1, s1, s6 565; GFX8-NEXT: s_and_b32 s4, s4, s2 566; GFX8-NEXT: s_or_b32 s0, s0, s1 567; GFX8-NEXT: s_and_b32 s1, s5, 7 568; GFX8-NEXT: s_andn2_b32 s5, 7, s5 569; GFX8-NEXT: s_lshl_b32 s3, s3, 1 570; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 571; GFX8-NEXT: s_lshl_b32 s3, s3, s5 572; GFX8-NEXT: s_lshr_b32 s1, s4, s1 573; GFX8-NEXT: s_or_b32 s1, s3, s1 574; GFX8-NEXT: s_and_b32 s0, s0, s2 575; GFX8-NEXT: s_and_b32 s1, s1, s2 576; GFX8-NEXT: s_bfe_u32 s2, 8, 0x100000 577; GFX8-NEXT: s_lshl_b32 s1, s1, s2 578; GFX8-NEXT: s_or_b32 s0, s0, s1 579; GFX8-NEXT: ; return to shader part epilog 580; 581; GFX9-LABEL: s_fshr_v2i8: 582; GFX9: ; %bb.0: 583; GFX9-NEXT: s_lshr_b32 s3, s0, 8 584; GFX9-NEXT: s_lshr_b32 s5, s2, 8 585; GFX9-NEXT: s_and_b32 s6, s2, 7 586; GFX9-NEXT: s_andn2_b32 s2, 7, s2 587; GFX9-NEXT: s_lshl_b32 s0, s0, 1 588; GFX9-NEXT: s_lshl_b32 s0, s0, s2 589; GFX9-NEXT: s_movk_i32 s2, 0xff 590; GFX9-NEXT: s_lshr_b32 s4, s1, 8 591; GFX9-NEXT: s_and_b32 s1, s1, s2 592; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 593; GFX9-NEXT: s_lshr_b32 s1, s1, s6 594; GFX9-NEXT: s_and_b32 s4, s4, s2 595; GFX9-NEXT: s_or_b32 s0, s0, s1 596; GFX9-NEXT: s_and_b32 s1, s5, 7 597; GFX9-NEXT: s_andn2_b32 s5, 7, s5 598; GFX9-NEXT: s_lshl_b32 s3, s3, 1 599; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000 600; GFX9-NEXT: s_lshl_b32 s3, s3, s5 601; GFX9-NEXT: s_lshr_b32 s1, s4, s1 602; GFX9-NEXT: s_or_b32 s1, s3, s1 603; GFX9-NEXT: s_and_b32 s0, s0, s2 604; GFX9-NEXT: s_and_b32 s1, s1, s2 605; GFX9-NEXT: s_bfe_u32 s2, 8, 0x100000 606; GFX9-NEXT: s_lshl_b32 s1, s1, s2 607; GFX9-NEXT: s_or_b32 s0, s0, s1 608; GFX9-NEXT: ; return to shader part epilog 609; 610; GFX10-LABEL: s_fshr_v2i8: 611; GFX10: ; %bb.0: 612; GFX10-NEXT: s_lshr_b32 s4, s1, 8 613; GFX10-NEXT: s_movk_i32 s7, 0xff 614; GFX10-NEXT: s_lshr_b32 s3, s0, 8 615; GFX10-NEXT: s_lshr_b32 s5, s2, 8 616; GFX10-NEXT: s_and_b32 s6, s2, 7 617; GFX10-NEXT: s_andn2_b32 s2, 7, s2 618; GFX10-NEXT: s_lshl_b32 s0, s0, 1 619; GFX10-NEXT: s_and_b32 s4, s4, s7 620; GFX10-NEXT: s_and_b32 s1, s1, s7 621; GFX10-NEXT: s_lshl_b32 s0, s0, s2 622; GFX10-NEXT: s_and_b32 s2, s5, 7 623; GFX10-NEXT: s_andn2_b32 s5, 7, s5 624; GFX10-NEXT: s_lshl_b32 s3, s3, 1 625; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000 626; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 627; GFX10-NEXT: s_lshl_b32 s3, s3, s5 628; GFX10-NEXT: s_lshr_b32 s2, s4, s2 629; GFX10-NEXT: s_lshr_b32 s1, s1, s6 630; GFX10-NEXT: s_or_b32 s2, s3, s2 631; GFX10-NEXT: s_or_b32 s0, s0, s1 632; GFX10-NEXT: s_and_b32 s1, s2, s7 633; GFX10-NEXT: s_bfe_u32 s2, 8, 0x100000 634; GFX10-NEXT: s_and_b32 s0, s0, s7 635; GFX10-NEXT: s_lshl_b32 s1, s1, s2 636; GFX10-NEXT: s_or_b32 s0, s0, s1 637; GFX10-NEXT: ; return to shader part epilog 638 %lhs = bitcast i16 %lhs.arg to <2 x i8> 639 %rhs = bitcast i16 %rhs.arg to <2 x i8> 640 %amt = bitcast i16 %amt.arg to <2 x i8> 641 %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt) 642 %cast.result = bitcast <2 x i8> %result to i16 643 ret i16 %cast.result 644} 645 646define i16 @v_fshr_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) { 647; GFX6-LABEL: v_fshr_v2i8: 648; GFX6: ; %bb.0: 649; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 650; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v2 651; GFX6-NEXT: v_and_b32_e32 v5, 7, v2 652; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 653; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0 654; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 655; GFX6-NEXT: s_movk_i32 s4, 0xff 656; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 657; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 658; GFX6-NEXT: v_and_b32_e32 v2, s4, v1 659; GFX6-NEXT: v_lshrrev_b32_e32 v2, v5, v2 660; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 661; GFX6-NEXT: v_and_b32_e32 v2, 7, v4 662; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 663; GFX6-NEXT: v_and_b32_e32 v4, 7, v4 664; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 665; GFX6-NEXT: v_bfe_u32 v1, v1, 8, 8 666; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3 667; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 668; GFX6-NEXT: v_or_b32_e32 v1, v3, v1 669; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 670; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0 671; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 672; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 673; GFX6-NEXT: s_setpc_b64 s[30:31] 674; 675; GFX8-LABEL: v_fshr_v2i8: 676; GFX8: ; %bb.0: 677; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 678; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2 679; GFX8-NEXT: v_and_b32_e32 v6, 7, v2 680; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 681; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 682; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 683; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 684; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 685; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 686; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 687; GFX8-NEXT: v_xor_b32_e32 v2, -1, v5 688; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 689; GFX8-NEXT: v_and_b32_e32 v1, 7, v5 690; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 691; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3 692; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v3 693; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 694; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 695; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1 696; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 697; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 698; GFX8-NEXT: s_setpc_b64 s[30:31] 699; 700; GFX9-LABEL: v_fshr_v2i8: 701; GFX9: ; %bb.0: 702; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 703; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2 704; GFX9-NEXT: v_and_b32_e32 v6, 7, v2 705; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 706; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 707; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 708; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 709; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 710; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 711; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 712; GFX9-NEXT: v_xor_b32_e32 v2, -1, v5 713; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 714; GFX9-NEXT: v_and_b32_e32 v1, 7, v5 715; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 716; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v3 717; GFX9-NEXT: v_lshlrev_b16_e32 v2, v2, v3 718; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 719; GFX9-NEXT: v_or_b32_e32 v1, v2, v1 720; GFX9-NEXT: v_and_b32_e32 v1, 0xff, v1 721; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 722; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 723; GFX9-NEXT: s_setpc_b64 s[30:31] 724; 725; GFX10-LABEL: v_fshr_v2i8: 726; GFX10: ; %bb.0: 727; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 728; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 729; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v2 730; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0 731; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1 732; GFX10-NEXT: s_movk_i32 s4, 0xff 733; GFX10-NEXT: v_and_b32_e32 v7, 7, v2 734; GFX10-NEXT: v_xor_b32_e32 v6, -1, v3 735; GFX10-NEXT: v_xor_b32_e32 v2, -1, v2 736; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 737; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4 738; GFX10-NEXT: v_and_b32_e32 v5, s4, v5 739; GFX10-NEXT: v_and_b32_e32 v6, 7, v6 740; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 741; GFX10-NEXT: v_and_b32_e32 v1, s4, v1 742; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 743; GFX10-NEXT: v_lshrrev_b16 v3, v3, v5 744; GFX10-NEXT: v_lshlrev_b16 v4, v6, v4 745; GFX10-NEXT: v_lshrrev_b16 v1, v7, v1 746; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0 747; GFX10-NEXT: v_or_b32_e32 v2, v4, v3 748; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 749; GFX10-NEXT: v_and_b32_sdwa v1, v2, s4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 750; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 751; GFX10-NEXT: s_setpc_b64 s[30:31] 752 %lhs = bitcast i16 %lhs.arg to <2 x i8> 753 %rhs = bitcast i16 %rhs.arg to <2 x i8> 754 %amt = bitcast i16 %amt.arg to <2 x i8> 755 %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt) 756 %cast.result = bitcast <2 x i8> %result to i16 757 ret i16 %cast.result 758} 759 760define amdgpu_ps i32 @s_fshr_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) { 761; GFX6-LABEL: s_fshr_v4i8: 762; GFX6: ; %bb.0: 763; GFX6-NEXT: s_lshr_b32 s3, s0, 8 764; GFX6-NEXT: s_lshr_b32 s4, s0, 16 765; GFX6-NEXT: s_lshr_b32 s5, s0, 24 766; GFX6-NEXT: s_lshr_b32 s7, s2, 8 767; GFX6-NEXT: s_lshr_b32 s8, s2, 16 768; GFX6-NEXT: s_lshr_b32 s9, s2, 24 769; GFX6-NEXT: s_and_b32 s10, s2, 7 770; GFX6-NEXT: s_andn2_b32 s2, 7, s2 771; GFX6-NEXT: s_movk_i32 s11, 0xff 772; GFX6-NEXT: s_lshl_b32 s0, s0, 1 773; GFX6-NEXT: s_lshl_b32 s0, s0, s2 774; GFX6-NEXT: s_and_b32 s2, s1, s11 775; GFX6-NEXT: s_lshr_b32 s2, s2, s10 776; GFX6-NEXT: s_or_b32 s0, s0, s2 777; GFX6-NEXT: s_and_b32 s2, s7, 7 778; GFX6-NEXT: s_andn2_b32 s7, 7, s7 779; GFX6-NEXT: s_lshl_b32 s3, s3, 1 780; GFX6-NEXT: s_lshl_b32 s3, s3, s7 781; GFX6-NEXT: s_bfe_u32 s7, s1, 0x80008 782; GFX6-NEXT: s_lshr_b32 s2, s7, s2 783; GFX6-NEXT: s_lshr_b32 s6, s1, 24 784; GFX6-NEXT: s_or_b32 s2, s3, s2 785; GFX6-NEXT: s_and_b32 s3, s8, 7 786; GFX6-NEXT: s_andn2_b32 s7, 7, s8 787; GFX6-NEXT: s_lshl_b32 s4, s4, 1 788; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80010 789; GFX6-NEXT: s_lshl_b32 s4, s4, s7 790; GFX6-NEXT: s_lshr_b32 s1, s1, s3 791; GFX6-NEXT: s_or_b32 s1, s4, s1 792; GFX6-NEXT: s_and_b32 s3, s9, 7 793; GFX6-NEXT: s_andn2_b32 s4, 7, s9 794; GFX6-NEXT: s_lshl_b32 s5, s5, 1 795; GFX6-NEXT: s_and_b32 s2, s2, s11 796; GFX6-NEXT: s_lshl_b32 s4, s5, s4 797; GFX6-NEXT: s_lshr_b32 s3, s6, s3 798; GFX6-NEXT: s_and_b32 s0, s0, s11 799; GFX6-NEXT: s_lshl_b32 s2, s2, 8 800; GFX6-NEXT: s_and_b32 s1, s1, s11 801; GFX6-NEXT: s_or_b32 s3, s4, s3 802; GFX6-NEXT: s_or_b32 s0, s0, s2 803; GFX6-NEXT: s_lshl_b32 s1, s1, 16 804; GFX6-NEXT: s_or_b32 s0, s0, s1 805; GFX6-NEXT: s_and_b32 s1, s3, s11 806; GFX6-NEXT: s_lshl_b32 s1, s1, 24 807; GFX6-NEXT: s_or_b32 s0, s0, s1 808; GFX6-NEXT: ; return to shader part epilog 809; 810; GFX8-LABEL: s_fshr_v4i8: 811; GFX8: ; %bb.0: 812; GFX8-NEXT: s_movk_i32 s13, 0xff 813; GFX8-NEXT: s_lshr_b32 s3, s0, 8 814; GFX8-NEXT: s_lshr_b32 s4, s0, 16 815; GFX8-NEXT: s_lshr_b32 s5, s0, 24 816; GFX8-NEXT: s_lshr_b32 s6, s1, 8 817; GFX8-NEXT: s_lshr_b32 s7, s1, 16 818; GFX8-NEXT: s_lshr_b32 s8, s1, 24 819; GFX8-NEXT: s_lshr_b32 s9, s2, 8 820; GFX8-NEXT: s_lshr_b32 s10, s2, 16 821; GFX8-NEXT: s_lshr_b32 s11, s2, 24 822; GFX8-NEXT: s_and_b32 s12, s2, 7 823; GFX8-NEXT: s_andn2_b32 s2, 7, s2 824; GFX8-NEXT: s_lshl_b32 s0, s0, 1 825; GFX8-NEXT: s_and_b32 s1, s1, s13 826; GFX8-NEXT: s_lshl_b32 s0, s0, s2 827; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 828; GFX8-NEXT: s_andn2_b32 s2, 7, s9 829; GFX8-NEXT: s_lshl_b32 s3, s3, 1 830; GFX8-NEXT: s_lshr_b32 s1, s1, s12 831; GFX8-NEXT: s_lshl_b32 s2, s3, s2 832; GFX8-NEXT: s_and_b32 s3, s6, s13 833; GFX8-NEXT: s_or_b32 s0, s0, s1 834; GFX8-NEXT: s_and_b32 s1, s9, 7 835; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 836; GFX8-NEXT: s_lshr_b32 s1, s3, s1 837; GFX8-NEXT: s_andn2_b32 s3, 7, s10 838; GFX8-NEXT: s_lshl_b32 s4, s4, 1 839; GFX8-NEXT: s_lshl_b32 s3, s4, s3 840; GFX8-NEXT: s_and_b32 s4, s7, s13 841; GFX8-NEXT: s_or_b32 s1, s2, s1 842; GFX8-NEXT: s_and_b32 s2, s10, 7 843; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 844; GFX8-NEXT: s_lshr_b32 s2, s4, s2 845; GFX8-NEXT: s_and_b32 s1, s1, s13 846; GFX8-NEXT: s_or_b32 s2, s3, s2 847; GFX8-NEXT: s_and_b32 s3, s11, 7 848; GFX8-NEXT: s_andn2_b32 s4, 7, s11 849; GFX8-NEXT: s_lshl_b32 s5, s5, 1 850; GFX8-NEXT: s_and_b32 s0, s0, s13 851; GFX8-NEXT: s_lshl_b32 s1, s1, 8 852; GFX8-NEXT: s_lshl_b32 s4, s5, s4 853; GFX8-NEXT: s_lshr_b32 s3, s8, s3 854; GFX8-NEXT: s_or_b32 s0, s0, s1 855; GFX8-NEXT: s_and_b32 s1, s2, s13 856; GFX8-NEXT: s_or_b32 s3, s4, s3 857; GFX8-NEXT: s_lshl_b32 s1, s1, 16 858; GFX8-NEXT: s_or_b32 s0, s0, s1 859; GFX8-NEXT: s_and_b32 s1, s3, s13 860; GFX8-NEXT: s_lshl_b32 s1, s1, 24 861; GFX8-NEXT: s_or_b32 s0, s0, s1 862; GFX8-NEXT: ; return to shader part epilog 863; 864; GFX9-LABEL: s_fshr_v4i8: 865; GFX9: ; %bb.0: 866; GFX9-NEXT: s_movk_i32 s13, 0xff 867; GFX9-NEXT: s_lshr_b32 s3, s0, 8 868; GFX9-NEXT: s_lshr_b32 s4, s0, 16 869; GFX9-NEXT: s_lshr_b32 s5, s0, 24 870; GFX9-NEXT: s_lshr_b32 s6, s1, 8 871; GFX9-NEXT: s_lshr_b32 s7, s1, 16 872; GFX9-NEXT: s_lshr_b32 s8, s1, 24 873; GFX9-NEXT: s_lshr_b32 s9, s2, 8 874; GFX9-NEXT: s_lshr_b32 s10, s2, 16 875; GFX9-NEXT: s_lshr_b32 s11, s2, 24 876; GFX9-NEXT: s_and_b32 s12, s2, 7 877; GFX9-NEXT: s_andn2_b32 s2, 7, s2 878; GFX9-NEXT: s_lshl_b32 s0, s0, 1 879; GFX9-NEXT: s_and_b32 s1, s1, s13 880; GFX9-NEXT: s_lshl_b32 s0, s0, s2 881; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 882; GFX9-NEXT: s_andn2_b32 s2, 7, s9 883; GFX9-NEXT: s_lshl_b32 s3, s3, 1 884; GFX9-NEXT: s_lshr_b32 s1, s1, s12 885; GFX9-NEXT: s_lshl_b32 s2, s3, s2 886; GFX9-NEXT: s_and_b32 s3, s6, s13 887; GFX9-NEXT: s_or_b32 s0, s0, s1 888; GFX9-NEXT: s_and_b32 s1, s9, 7 889; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000 890; GFX9-NEXT: s_lshr_b32 s1, s3, s1 891; GFX9-NEXT: s_andn2_b32 s3, 7, s10 892; GFX9-NEXT: s_lshl_b32 s4, s4, 1 893; GFX9-NEXT: s_lshl_b32 s3, s4, s3 894; GFX9-NEXT: s_and_b32 s4, s7, s13 895; GFX9-NEXT: s_or_b32 s1, s2, s1 896; GFX9-NEXT: s_and_b32 s2, s10, 7 897; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000 898; GFX9-NEXT: s_lshr_b32 s2, s4, s2 899; GFX9-NEXT: s_and_b32 s1, s1, s13 900; GFX9-NEXT: s_or_b32 s2, s3, s2 901; GFX9-NEXT: s_and_b32 s3, s11, 7 902; GFX9-NEXT: s_andn2_b32 s4, 7, s11 903; GFX9-NEXT: s_lshl_b32 s5, s5, 1 904; GFX9-NEXT: s_and_b32 s0, s0, s13 905; GFX9-NEXT: s_lshl_b32 s1, s1, 8 906; GFX9-NEXT: s_lshl_b32 s4, s5, s4 907; GFX9-NEXT: s_lshr_b32 s3, s8, s3 908; GFX9-NEXT: s_or_b32 s0, s0, s1 909; GFX9-NEXT: s_and_b32 s1, s2, s13 910; GFX9-NEXT: s_or_b32 s3, s4, s3 911; GFX9-NEXT: s_lshl_b32 s1, s1, 16 912; GFX9-NEXT: s_or_b32 s0, s0, s1 913; GFX9-NEXT: s_and_b32 s1, s3, s13 914; GFX9-NEXT: s_lshl_b32 s1, s1, 24 915; GFX9-NEXT: s_or_b32 s0, s0, s1 916; GFX9-NEXT: ; return to shader part epilog 917; 918; GFX10-LABEL: s_fshr_v4i8: 919; GFX10: ; %bb.0: 920; GFX10-NEXT: s_lshr_b32 s6, s1, 8 921; GFX10-NEXT: s_movk_i32 s13, 0xff 922; GFX10-NEXT: s_lshr_b32 s3, s0, 8 923; GFX10-NEXT: s_lshr_b32 s4, s0, 16 924; GFX10-NEXT: s_lshr_b32 s5, s0, 24 925; GFX10-NEXT: s_lshr_b32 s7, s1, 16 926; GFX10-NEXT: s_lshr_b32 s8, s1, 24 927; GFX10-NEXT: s_lshr_b32 s9, s2, 8 928; GFX10-NEXT: s_lshr_b32 s10, s2, 16 929; GFX10-NEXT: s_lshr_b32 s11, s2, 24 930; GFX10-NEXT: s_and_b32 s12, s2, 7 931; GFX10-NEXT: s_andn2_b32 s2, 7, s2 932; GFX10-NEXT: s_and_b32 s1, s1, s13 933; GFX10-NEXT: s_lshl_b32 s0, s0, 1 934; GFX10-NEXT: s_and_b32 s6, s6, s13 935; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 936; GFX10-NEXT: s_lshl_b32 s0, s0, s2 937; GFX10-NEXT: s_and_b32 s2, s9, 7 938; GFX10-NEXT: s_andn2_b32 s9, 7, s9 939; GFX10-NEXT: s_lshl_b32 s3, s3, 1 940; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 941; GFX10-NEXT: s_lshr_b32 s1, s1, s12 942; GFX10-NEXT: s_lshl_b32 s3, s3, s9 943; GFX10-NEXT: s_lshr_b32 s2, s6, s2 944; GFX10-NEXT: s_and_b32 s6, s7, s13 945; GFX10-NEXT: s_or_b32 s0, s0, s1 946; GFX10-NEXT: s_or_b32 s1, s3, s2 947; GFX10-NEXT: s_and_b32 s2, s10, 7 948; GFX10-NEXT: s_andn2_b32 s3, 7, s10 949; GFX10-NEXT: s_lshl_b32 s4, s4, 1 950; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 951; GFX10-NEXT: s_lshl_b32 s3, s4, s3 952; GFX10-NEXT: s_lshr_b32 s2, s6, s2 953; GFX10-NEXT: s_andn2_b32 s4, 7, s11 954; GFX10-NEXT: s_lshl_b32 s5, s5, 1 955; GFX10-NEXT: s_and_b32 s6, s11, 7 956; GFX10-NEXT: s_lshl_b32 s4, s5, s4 957; GFX10-NEXT: s_lshr_b32 s5, s8, s6 958; GFX10-NEXT: s_or_b32 s2, s3, s2 959; GFX10-NEXT: s_and_b32 s1, s1, s13 960; GFX10-NEXT: s_or_b32 s3, s4, s5 961; GFX10-NEXT: s_and_b32 s0, s0, s13 962; GFX10-NEXT: s_lshl_b32 s1, s1, 8 963; GFX10-NEXT: s_and_b32 s2, s2, s13 964; GFX10-NEXT: s_or_b32 s0, s0, s1 965; GFX10-NEXT: s_lshl_b32 s1, s2, 16 966; GFX10-NEXT: s_and_b32 s2, s3, s13 967; GFX10-NEXT: s_or_b32 s0, s0, s1 968; GFX10-NEXT: s_lshl_b32 s1, s2, 24 969; GFX10-NEXT: s_or_b32 s0, s0, s1 970; GFX10-NEXT: ; return to shader part epilog 971 %lhs = bitcast i32 %lhs.arg to <4 x i8> 972 %rhs = bitcast i32 %rhs.arg to <4 x i8> 973 %amt = bitcast i32 %amt.arg to <4 x i8> 974 %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt) 975 %cast.result = bitcast <4 x i8> %result to i32 976 ret i32 %cast.result 977} 978 979define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { 980; GFX6-LABEL: v_fshr_v4i8: 981; GFX6: ; %bb.0: 982; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 983; GFX6-NEXT: v_lshrrev_b32_e32 v7, 8, v2 984; GFX6-NEXT: v_lshrrev_b32_e32 v8, 16, v2 985; GFX6-NEXT: v_lshrrev_b32_e32 v9, 24, v2 986; GFX6-NEXT: v_and_b32_e32 v10, 7, v2 987; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 988; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0 989; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v0 990; GFX6-NEXT: v_lshrrev_b32_e32 v5, 24, v0 991; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 992; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 993; GFX6-NEXT: v_and_b32_e32 v11, 0xff, v1 994; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 995; GFX6-NEXT: v_lshrrev_b32_e32 v10, v10, v11 996; GFX6-NEXT: v_or_b32_e32 v0, v0, v10 997; GFX6-NEXT: v_and_b32_e32 v10, 7, v7 998; GFX6-NEXT: v_xor_b32_e32 v7, -1, v7 999; GFX6-NEXT: v_and_b32_e32 v7, 7, v7 1000; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 1001; GFX6-NEXT: v_lshlrev_b32_e32 v3, v7, v3 1002; GFX6-NEXT: v_bfe_u32 v7, v1, 8, 8 1003; GFX6-NEXT: v_lshrrev_b32_e32 v7, v10, v7 1004; GFX6-NEXT: v_or_b32_e32 v3, v3, v7 1005; GFX6-NEXT: v_and_b32_e32 v7, 7, v8 1006; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 1007; GFX6-NEXT: v_lshrrev_b32_e32 v6, 24, v1 1008; GFX6-NEXT: v_and_b32_e32 v8, 7, v8 1009; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4 1010; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 1011; GFX6-NEXT: v_mov_b32_e32 v2, 0xff 1012; GFX6-NEXT: v_lshlrev_b32_e32 v4, v8, v4 1013; GFX6-NEXT: v_lshrrev_b32_e32 v1, v7, v1 1014; GFX6-NEXT: v_xor_b32_e32 v7, -1, v9 1015; GFX6-NEXT: v_or_b32_e32 v1, v4, v1 1016; GFX6-NEXT: v_and_b32_e32 v4, 7, v9 1017; GFX6-NEXT: v_and_b32_e32 v7, 7, v7 1018; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5 1019; GFX6-NEXT: v_and_b32_e32 v3, v3, v2 1020; GFX6-NEXT: v_lshlrev_b32_e32 v5, v7, v5 1021; GFX6-NEXT: v_lshrrev_b32_e32 v4, v4, v6 1022; GFX6-NEXT: v_and_b32_e32 v0, v0, v2 1023; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 1024; GFX6-NEXT: v_and_b32_e32 v1, v1, v2 1025; GFX6-NEXT: v_or_b32_e32 v4, v5, v4 1026; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 1027; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1028; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1029; GFX6-NEXT: v_and_b32_e32 v1, v4, v2 1030; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 1031; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1032; GFX6-NEXT: s_setpc_b64 s[30:31] 1033; 1034; GFX8-LABEL: v_fshr_v4i8: 1035; GFX8: ; %bb.0: 1036; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1037; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2 1038; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2 1039; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v2 1040; GFX8-NEXT: v_and_b32_e32 v8, 7, v2 1041; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 1042; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 1043; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v0 1044; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v9 1045; GFX8-NEXT: v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1046; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1047; GFX8-NEXT: v_or_b32_e32 v2, v2, v8 1048; GFX8-NEXT: v_and_b32_e32 v8, 7, v5 1049; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 1050; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 1051; GFX8-NEXT: v_and_b32_e32 v5, 7, v5 1052; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1053; GFX8-NEXT: v_lshlrev_b16_e32 v3, v5, v3 1054; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1055; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 1056; GFX8-NEXT: v_and_b32_e32 v4, 7, v6 1057; GFX8-NEXT: v_xor_b32_e32 v5, -1, v6 1058; GFX8-NEXT: v_mov_b32_e32 v6, 1 1059; GFX8-NEXT: v_mov_b32_e32 v9, 0xff 1060; GFX8-NEXT: v_and_b32_e32 v5, 7, v5 1061; GFX8-NEXT: v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1062; GFX8-NEXT: v_lshlrev_b16_e32 v5, v5, v8 1063; GFX8-NEXT: v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1064; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v8 1065; GFX8-NEXT: v_or_b32_e32 v4, v5, v4 1066; GFX8-NEXT: v_and_b32_e32 v5, 7, v7 1067; GFX8-NEXT: v_xor_b32_e32 v7, -1, v7 1068; GFX8-NEXT: v_and_b32_e32 v7, 7, v7 1069; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1070; GFX8-NEXT: v_lshlrev_b16_e32 v0, v7, v0 1071; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1072; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1073; GFX8-NEXT: v_mov_b32_e32 v1, 8 1074; GFX8-NEXT: s_movk_i32 s4, 0xff 1075; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1076; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1077; GFX8-NEXT: v_and_b32_e32 v2, s4, v4 1078; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1079; GFX8-NEXT: v_and_b32_e32 v0, s4, v0 1080; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 1081; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0 1082; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 1083; GFX8-NEXT: s_setpc_b64 s[30:31] 1084; 1085; GFX9-LABEL: v_fshr_v4i8: 1086; GFX9: ; %bb.0: 1087; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1088; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2 1089; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2 1090; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v2 1091; GFX9-NEXT: v_and_b32_e32 v8, 7, v2 1092; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 1093; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 1094; GFX9-NEXT: v_lshlrev_b16_e32 v9, 1, v0 1095; GFX9-NEXT: v_lshlrev_b16_e32 v2, v2, v9 1096; GFX9-NEXT: v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1097; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1098; GFX9-NEXT: v_or_b32_e32 v2, v2, v8 1099; GFX9-NEXT: v_and_b32_e32 v8, 7, v5 1100; GFX9-NEXT: v_xor_b32_e32 v5, -1, v5 1101; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 1102; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 1103; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1104; GFX9-NEXT: v_lshlrev_b16_e32 v3, v5, v3 1105; GFX9-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1106; GFX9-NEXT: v_or_b32_e32 v3, v3, v4 1107; GFX9-NEXT: v_and_b32_e32 v4, 7, v6 1108; GFX9-NEXT: v_xor_b32_e32 v5, -1, v6 1109; GFX9-NEXT: v_mov_b32_e32 v6, 1 1110; GFX9-NEXT: v_mov_b32_e32 v9, 0xff 1111; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 1112; GFX9-NEXT: v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1113; GFX9-NEXT: v_lshlrev_b16_e32 v5, v5, v8 1114; GFX9-NEXT: v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1115; GFX9-NEXT: v_lshrrev_b16_e32 v4, v4, v8 1116; GFX9-NEXT: v_or_b32_e32 v4, v5, v4 1117; GFX9-NEXT: v_and_b32_e32 v5, 7, v7 1118; GFX9-NEXT: v_xor_b32_e32 v7, -1, v7 1119; GFX9-NEXT: v_and_b32_e32 v7, 7, v7 1120; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1121; GFX9-NEXT: v_lshlrev_b16_e32 v0, v7, v0 1122; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1123; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 1124; GFX9-NEXT: v_mov_b32_e32 v1, 8 1125; GFX9-NEXT: s_movk_i32 s4, 0xff 1126; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1127; GFX9-NEXT: v_and_or_b32 v1, v2, s4, v1 1128; GFX9-NEXT: v_and_b32_e32 v2, s4, v4 1129; GFX9-NEXT: v_and_b32_e32 v0, s4, v0 1130; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1131; GFX9-NEXT: v_lshlrev_b32_e32 v0, 24, v0 1132; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 1133; GFX9-NEXT: s_setpc_b64 s[30:31] 1134; 1135; GFX10-LABEL: v_fshr_v4i8: 1136; GFX10: ; %bb.0: 1137; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1138; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1139; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v2 1140; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1141; GFX10-NEXT: v_xor_b32_e32 v8, -1, v2 1142; GFX10-NEXT: v_lshrrev_b32_e32 v10, 16, v2 1143; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v2 1144; GFX10-NEXT: v_xor_b32_e32 v11, -1, v6 1145; GFX10-NEXT: v_lshlrev_b16 v3, 1, v3 1146; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0 1147; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 1148; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v1 1149; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 1150; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 1151; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 1152; GFX10-NEXT: v_mov_b32_e32 v13, 0xff 1153; GFX10-NEXT: v_xor_b32_e32 v14, -1, v12 1154; GFX10-NEXT: v_lshlrev_b16 v3, v11, v3 1155; GFX10-NEXT: v_xor_b32_e32 v11, -1, v10 1156; GFX10-NEXT: s_movk_i32 s4, 0xff 1157; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v1 1158; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0 1159; GFX10-NEXT: v_and_b32_e32 v8, s4, v1 1160; GFX10-NEXT: v_and_b32_e32 v6, 7, v6 1161; GFX10-NEXT: v_and_b32_e32 v7, s4, v7 1162; GFX10-NEXT: v_and_b32_e32 v10, 7, v10 1163; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 1164; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4 1165; GFX10-NEXT: v_and_b32_sdwa v1, v1, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1166; GFX10-NEXT: v_and_b32_e32 v13, 7, v14 1167; GFX10-NEXT: v_lshlrev_b16 v5, 1, v5 1168; GFX10-NEXT: v_and_b32_e32 v12, 7, v12 1169; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 1170; GFX10-NEXT: v_lshrrev_b16 v6, v6, v7 1171; GFX10-NEXT: v_lshlrev_b16 v4, v11, v4 1172; GFX10-NEXT: v_lshrrev_b16 v1, v10, v1 1173; GFX10-NEXT: v_lshlrev_b16 v5, v13, v5 1174; GFX10-NEXT: v_lshrrev_b16 v7, v12, v9 1175; GFX10-NEXT: v_lshrrev_b16 v2, v2, v8 1176; GFX10-NEXT: v_or_b32_e32 v3, v3, v6 1177; GFX10-NEXT: v_mov_b32_e32 v6, 8 1178; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 1179; GFX10-NEXT: v_or_b32_e32 v4, v5, v7 1180; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 1181; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1182; GFX10-NEXT: v_and_b32_e32 v1, s4, v1 1183; GFX10-NEXT: v_and_b32_e32 v3, s4, v4 1184; GFX10-NEXT: v_and_or_b32 v0, v0, s4, v2 1185; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1186; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 1187; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 1188; GFX10-NEXT: s_setpc_b64 s[30:31] 1189 %lhs = bitcast i32 %lhs.arg to <4 x i8> 1190 %rhs = bitcast i32 %rhs.arg to <4 x i8> 1191 %amt = bitcast i32 %amt.arg to <4 x i8> 1192 %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt) 1193 %cast.result = bitcast <4 x i8> %result to i32 1194 ret i32 %cast.result 1195} 1196 1197define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt) { 1198; GFX6-LABEL: s_fshr_i24: 1199; GFX6: ; %bb.0: 1200; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1201; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 1202; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1203; GFX6-NEXT: s_mov_b32 s3, 0xffffff 1204; GFX6-NEXT: s_and_b32 s2, s2, s3 1205; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1206; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 1207; GFX6-NEXT: s_lshl_b32 s0, s0, 1 1208; GFX6-NEXT: s_and_b32 s1, s1, s3 1209; GFX6-NEXT: v_mul_lo_u32 v1, v1, v0 1210; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 1211; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 1212; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 1213; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 1214; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 1215; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0 1216; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1217; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1218; GFX6-NEXT: v_subrev_i32_e32 v1, vcc, 24, v0 1219; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1220; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1221; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 23, v0 1222; GFX6-NEXT: v_and_b32_e32 v0, s3, v0 1223; GFX6-NEXT: v_and_b32_e32 v1, s3, v1 1224; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 1225; GFX6-NEXT: v_lshr_b32_e32 v0, s1, v0 1226; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 1227; GFX6-NEXT: v_readfirstlane_b32 s0, v0 1228; GFX6-NEXT: ; return to shader part epilog 1229; 1230; GFX8-LABEL: s_fshr_i24: 1231; GFX8: ; %bb.0: 1232; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1233; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 1234; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1235; GFX8-NEXT: s_mov_b32 s3, 0xffffff 1236; GFX8-NEXT: s_and_b32 s2, s2, s3 1237; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1238; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 1239; GFX8-NEXT: s_lshl_b32 s0, s0, 1 1240; GFX8-NEXT: s_and_b32 s1, s1, s3 1241; GFX8-NEXT: v_mul_lo_u32 v1, v1, v0 1242; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 1243; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 1244; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0 1245; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 1246; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0 1247; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0 1248; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1249; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1250; GFX8-NEXT: v_subrev_u32_e32 v1, vcc, 24, v0 1251; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1252; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1253; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0 1254; GFX8-NEXT: v_and_b32_e32 v0, s3, v0 1255; GFX8-NEXT: v_and_b32_e32 v1, s3, v1 1256; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0 1257; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s1 1258; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 1259; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1260; GFX8-NEXT: ; return to shader part epilog 1261; 1262; GFX9-LABEL: s_fshr_i24: 1263; GFX9: ; %bb.0: 1264; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1265; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 1266; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1267; GFX9-NEXT: s_mov_b32 s3, 0xffffff 1268; GFX9-NEXT: s_and_b32 s2, s2, s3 1269; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1270; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 1271; GFX9-NEXT: s_and_b32 s1, s1, s3 1272; GFX9-NEXT: s_lshl_b32 s0, s0, 1 1273; GFX9-NEXT: v_mul_lo_u32 v1, v1, v0 1274; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 1275; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 1276; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0 1277; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 1278; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0 1279; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0 1280; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1281; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1282; GFX9-NEXT: v_subrev_u32_e32 v1, 24, v0 1283; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1284; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1285; GFX9-NEXT: v_sub_u32_e32 v1, 23, v0 1286; GFX9-NEXT: v_and_b32_e32 v0, s3, v0 1287; GFX9-NEXT: v_and_b32_e32 v1, s3, v1 1288; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s1 1289; GFX9-NEXT: v_lshl_or_b32 v0, s0, v1, v0 1290; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1291; GFX9-NEXT: ; return to shader part epilog 1292; 1293; GFX10-LABEL: s_fshr_i24: 1294; GFX10: ; %bb.0: 1295; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1296; GFX10-NEXT: s_mov_b32 s3, 0xffffff 1297; GFX10-NEXT: s_lshl_b32 s0, s0, 1 1298; GFX10-NEXT: s_and_b32 s2, s2, s3 1299; GFX10-NEXT: s_and_b32 s1, s1, s3 1300; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 1301; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1302; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 1303; GFX10-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0 1304; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 1305; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 1306; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0 1307; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 1308; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0 1309; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0 1310; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1311; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1312; GFX10-NEXT: v_subrev_nc_u32_e32 v1, 24, v0 1313; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1314; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1315; GFX10-NEXT: v_sub_nc_u32_e32 v1, 23, v0 1316; GFX10-NEXT: v_and_b32_e32 v0, s3, v0 1317; GFX10-NEXT: v_and_b32_e32 v1, s3, v1 1318; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s1 1319; GFX10-NEXT: v_lshl_or_b32 v0, s0, v1, v0 1320; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1321; GFX10-NEXT: ; return to shader part epilog 1322 %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt) 1323 ret i24 %result 1324} 1325 1326define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) { 1327; GFX6-LABEL: v_fshr_i24: 1328; GFX6: ; %bb.0: 1329; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1330; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1331; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 1332; GFX6-NEXT: v_mov_b32_e32 v4, 0xffffffe8 1333; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1334; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1335; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1336; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 1337; GFX6-NEXT: v_mul_lo_u32 v4, v4, v3 1338; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4 1339; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 1340; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 1341; GFX6-NEXT: v_mov_b32_e32 v4, 0xffffff 1342; GFX6-NEXT: v_and_b32_e32 v1, v1, v4 1343; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24 1344; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 1345; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2 1346; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1347; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1348; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v2 1349; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1350; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1351; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2 1352; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 1353; GFX6-NEXT: v_and_b32_e32 v3, v3, v4 1354; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0 1355; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1356; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1357; GFX6-NEXT: s_setpc_b64 s[30:31] 1358; 1359; GFX8-LABEL: v_fshr_i24: 1360; GFX8: ; %bb.0: 1361; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1362; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1363; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3 1364; GFX8-NEXT: v_mov_b32_e32 v4, 0xffffffe8 1365; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1366; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1367; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1368; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 1369; GFX8-NEXT: v_mul_lo_u32 v4, v4, v3 1370; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4 1371; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4 1372; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3 1373; GFX8-NEXT: v_mov_b32_e32 v4, 0xffffff 1374; GFX8-NEXT: v_and_b32_e32 v1, v1, v4 1375; GFX8-NEXT: v_mul_lo_u32 v3, v3, 24 1376; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 1377; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2 1378; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1379; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1380; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v2 1381; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1382; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1383; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2 1384; GFX8-NEXT: v_and_b32_e32 v2, v2, v4 1385; GFX8-NEXT: v_and_b32_e32 v3, v3, v4 1386; GFX8-NEXT: v_lshlrev_b32_e32 v0, v3, v0 1387; GFX8-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1388; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1389; GFX8-NEXT: s_setpc_b64 s[30:31] 1390; 1391; GFX9-LABEL: v_fshr_i24: 1392; GFX9: ; %bb.0: 1393; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1394; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1395; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 1396; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffffe8 1397; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1398; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1399; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1400; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 1401; GFX9-NEXT: v_mul_lo_u32 v4, v4, v3 1402; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 1403; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 1404; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 1405; GFX9-NEXT: v_mov_b32_e32 v4, 0xffffff 1406; GFX9-NEXT: v_and_b32_e32 v1, v1, v4 1407; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24 1408; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 1409; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2 1410; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1411; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1412; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v2 1413; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1414; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1415; GFX9-NEXT: v_sub_u32_e32 v3, 23, v2 1416; GFX9-NEXT: v_and_b32_e32 v2, v2, v4 1417; GFX9-NEXT: v_and_b32_e32 v3, v3, v4 1418; GFX9-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1419; GFX9-NEXT: v_lshl_or_b32 v0, v0, v3, v1 1420; GFX9-NEXT: s_setpc_b64 s[30:31] 1421; 1422; GFX10-LABEL: v_fshr_i24: 1423; GFX10: ; %bb.0: 1424; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1425; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1426; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1427; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1428; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1429; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 1430; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1431; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 1432; GFX10-NEXT: v_mul_lo_u32 v4, 0xffffffe8, v3 1433; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4 1434; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 1435; GFX10-NEXT: v_mov_b32_e32 v4, 0xffffff 1436; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3 1437; GFX10-NEXT: v_and_b32_e32 v1, v1, v4 1438; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24 1439; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3 1440; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2 1441; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1442; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1443; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v2 1444; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1445; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1446; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v2 1447; GFX10-NEXT: v_and_b32_e32 v2, v2, v4 1448; GFX10-NEXT: v_and_b32_e32 v3, v3, v4 1449; GFX10-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1450; GFX10-NEXT: v_lshl_or_b32 v0, v0, v3, v1 1451; GFX10-NEXT: s_setpc_b64 s[30:31] 1452 %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt) 1453 ret i24 %result 1454} 1455 1456define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) { 1457; GFX6-LABEL: s_fshr_v2i24: 1458; GFX6: ; %bb.0: 1459; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1460; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 1461; GFX6-NEXT: s_movk_i32 s9, 0xff 1462; GFX6-NEXT: s_mov_b32 s11, 0x80008 1463; GFX6-NEXT: s_lshr_b32 s6, s0, 16 1464; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1465; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 1466; GFX6-NEXT: s_lshr_b32 s7, s0, 24 1467; GFX6-NEXT: s_lshr_b32 s8, s1, 8 1468; GFX6-NEXT: s_and_b32 s10, s0, s9 1469; GFX6-NEXT: s_bfe_u32 s0, s0, s11 1470; GFX6-NEXT: s_and_b32 s1, s1, s9 1471; GFX6-NEXT: s_lshl_b32 s0, s0, 8 1472; GFX6-NEXT: s_lshl_b32 s1, s1, 8 1473; GFX6-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1474; GFX6-NEXT: s_or_b32 s0, s10, s0 1475; GFX6-NEXT: s_or_b32 s1, s7, s1 1476; GFX6-NEXT: s_and_b32 s7, s8, s9 1477; GFX6-NEXT: s_lshr_b32 s8, s2, 16 1478; GFX6-NEXT: s_lshr_b32 s10, s2, 24 1479; GFX6-NEXT: s_and_b32 s13, s2, s9 1480; GFX6-NEXT: s_bfe_u32 s2, s2, s11 1481; GFX6-NEXT: v_mul_lo_u32 v2, v1, v0 1482; GFX6-NEXT: s_lshl_b32 s2, s2, 8 1483; GFX6-NEXT: s_and_b32 s8, s8, s9 1484; GFX6-NEXT: s_or_b32 s2, s13, s2 1485; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 1486; GFX6-NEXT: s_lshr_b32 s12, s3, 8 1487; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 1488; GFX6-NEXT: s_lshl_b32 s8, s8, 16 1489; GFX6-NEXT: s_and_b32 s3, s3, s9 1490; GFX6-NEXT: s_or_b32 s2, s2, s8 1491; GFX6-NEXT: s_lshl_b32 s3, s3, 8 1492; GFX6-NEXT: s_and_b32 s8, s12, s9 1493; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 1494; GFX6-NEXT: s_or_b32 s3, s10, s3 1495; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 1496; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000 1497; GFX6-NEXT: s_lshl_b32 s8, s8, 16 1498; GFX6-NEXT: s_or_b32 s3, s3, s8 1499; GFX6-NEXT: s_lshr_b32 s8, s4, 16 1500; GFX6-NEXT: s_lshr_b32 s10, s4, 24 1501; GFX6-NEXT: s_and_b32 s13, s4, s9 1502; GFX6-NEXT: s_bfe_u32 s4, s4, s11 1503; GFX6-NEXT: s_lshl_b32 s4, s4, 8 1504; GFX6-NEXT: s_and_b32 s8, s8, s9 1505; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 1506; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 1507; GFX6-NEXT: s_or_b32 s4, s13, s4 1508; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 1509; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 1510; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000 1511; GFX6-NEXT: s_lshl_b32 s8, s8, 16 1512; GFX6-NEXT: s_or_b32 s4, s4, s8 1513; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 1514; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 1515; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 1516; GFX6-NEXT: s_lshr_b32 s12, s5, 8 1517; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 1518; GFX6-NEXT: s_and_b32 s5, s5, s9 1519; GFX6-NEXT: v_mul_lo_u32 v1, v1, v2 1520; GFX6-NEXT: s_lshl_b32 s5, s5, 8 1521; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 1522; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 1523; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1524; GFX6-NEXT: v_mul_hi_u32 v1, v2, v1 1525; GFX6-NEXT: s_and_b32 s8, s12, s9 1526; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1527; GFX6-NEXT: s_or_b32 s5, s10, s5 1528; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 1529; GFX6-NEXT: v_subrev_i32_e32 v3, vcc, 24, v0 1530; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000 1531; GFX6-NEXT: s_lshl_b32 s8, s8, 16 1532; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1533; GFX6-NEXT: s_or_b32 s5, s5, s8 1534; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1535; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v1 1536; GFX6-NEXT: v_mul_hi_u32 v1, s5, v1 1537; GFX6-NEXT: s_and_b32 s6, s6, s9 1538; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 1539; GFX6-NEXT: s_bfe_u32 s6, s6, 0x100000 1540; GFX6-NEXT: v_mul_lo_u32 v1, v1, 24 1541; GFX6-NEXT: s_mov_b32 s8, 0xffffff 1542; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v0 1543; GFX6-NEXT: s_lshl_b32 s4, s6, 17 1544; GFX6-NEXT: s_lshl_b32 s0, s0, 1 1545; GFX6-NEXT: s_or_b32 s0, s4, s0 1546; GFX6-NEXT: v_and_b32_e32 v2, s8, v3 1547; GFX6-NEXT: v_and_b32_e32 v0, s8, v0 1548; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 1549; GFX6-NEXT: v_lshr_b32_e32 v0, s2, v0 1550; GFX6-NEXT: v_sub_i32_e32 v1, vcc, s5, v1 1551; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 1552; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 1553; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 1554; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1555; GFX6-NEXT: v_subrev_i32_e32 v2, vcc, 24, v1 1556; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 1557; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 1558; GFX6-NEXT: s_bfe_u32 s7, s7, 0x100000 1559; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1560; GFX6-NEXT: v_mov_b32_e32 v4, 0xffffff 1561; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 23, v1 1562; GFX6-NEXT: s_lshl_b32 s0, s7, 17 1563; GFX6-NEXT: s_lshl_b32 s1, s1, 1 1564; GFX6-NEXT: s_or_b32 s0, s0, s1 1565; GFX6-NEXT: v_and_b32_e32 v2, v2, v4 1566; GFX6-NEXT: v_and_b32_e32 v1, v1, v4 1567; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 1568; GFX6-NEXT: v_lshr_b32_e32 v1, s3, v1 1569; GFX6-NEXT: v_bfe_u32 v3, v0, 8, 8 1570; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 1571; GFX6-NEXT: v_and_b32_e32 v2, s9, v0 1572; GFX6-NEXT: v_lshlrev_b32_e32 v3, 8, v3 1573; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 1574; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 1575; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 1576; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 1577; GFX6-NEXT: v_and_b32_e32 v2, s9, v1 1578; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 1579; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 1580; GFX6-NEXT: v_bfe_u32 v2, v1, 8, 8 1581; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 1582; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 1583; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 1584; GFX6-NEXT: v_readfirstlane_b32 s0, v0 1585; GFX6-NEXT: v_readfirstlane_b32 s1, v1 1586; GFX6-NEXT: ; return to shader part epilog 1587; 1588; GFX8-LABEL: s_fshr_v2i24: 1589; GFX8: ; %bb.0: 1590; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1591; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 1592; GFX8-NEXT: s_movk_i32 s10, 0xff 1593; GFX8-NEXT: s_lshr_b32 s9, s1, 8 1594; GFX8-NEXT: s_bfe_u32 s11, 8, 0x100000 1595; GFX8-NEXT: s_and_b32 s1, s1, s10 1596; GFX8-NEXT: s_lshr_b32 s6, s0, 8 1597; GFX8-NEXT: s_lshr_b32 s8, s0, 24 1598; GFX8-NEXT: s_lshl_b32 s1, s1, s11 1599; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1600; GFX8-NEXT: s_and_b32 s6, s6, s10 1601; GFX8-NEXT: s_or_b32 s1, s8, s1 1602; GFX8-NEXT: s_lshr_b32 s8, s2, 8 1603; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 1604; GFX8-NEXT: s_lshr_b32 s7, s0, 16 1605; GFX8-NEXT: s_and_b32 s0, s0, s10 1606; GFX8-NEXT: s_lshl_b32 s6, s6, s11 1607; GFX8-NEXT: s_and_b32 s8, s8, s10 1608; GFX8-NEXT: s_or_b32 s0, s0, s6 1609; GFX8-NEXT: s_and_b32 s6, s7, s10 1610; GFX8-NEXT: s_and_b32 s7, s9, s10 1611; GFX8-NEXT: s_lshr_b32 s9, s2, 16 1612; GFX8-NEXT: s_lshr_b32 s12, s2, 24 1613; GFX8-NEXT: s_and_b32 s2, s2, s10 1614; GFX8-NEXT: s_lshl_b32 s8, s8, s11 1615; GFX8-NEXT: s_or_b32 s2, s2, s8 1616; GFX8-NEXT: s_and_b32 s8, s9, s10 1617; GFX8-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1618; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000 1619; GFX8-NEXT: v_mul_lo_u32 v2, v1, v0 1620; GFX8-NEXT: s_lshr_b32 s13, s3, 8 1621; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 1622; GFX8-NEXT: s_lshl_b32 s8, s8, 16 1623; GFX8-NEXT: s_and_b32 s3, s3, s10 1624; GFX8-NEXT: s_or_b32 s2, s2, s8 1625; GFX8-NEXT: s_lshl_b32 s3, s3, s11 1626; GFX8-NEXT: s_and_b32 s8, s13, s10 1627; GFX8-NEXT: s_or_b32 s3, s12, s3 1628; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000 1629; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 1630; GFX8-NEXT: s_lshl_b32 s8, s8, 16 1631; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 1632; GFX8-NEXT: s_or_b32 s3, s3, s8 1633; GFX8-NEXT: s_lshr_b32 s8, s4, 8 1634; GFX8-NEXT: s_and_b32 s8, s8, s10 1635; GFX8-NEXT: s_lshr_b32 s9, s4, 16 1636; GFX8-NEXT: s_lshr_b32 s12, s4, 24 1637; GFX8-NEXT: s_and_b32 s4, s4, s10 1638; GFX8-NEXT: s_lshl_b32 s8, s8, s11 1639; GFX8-NEXT: s_or_b32 s4, s4, s8 1640; GFX8-NEXT: s_and_b32 s8, s9, s10 1641; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1642; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 1643; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000 1644; GFX8-NEXT: v_rcp_iflag_f32_e32 v2, v2 1645; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 1646; GFX8-NEXT: s_lshl_b32 s8, s8, 16 1647; GFX8-NEXT: s_or_b32 s4, s4, s8 1648; GFX8-NEXT: v_mul_hi_u32 v0, s4, v0 1649; GFX8-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 1650; GFX8-NEXT: v_cvt_u32_f32_e32 v2, v2 1651; GFX8-NEXT: s_lshr_b32 s13, s5, 8 1652; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 1653; GFX8-NEXT: s_and_b32 s5, s5, s10 1654; GFX8-NEXT: v_mul_lo_u32 v1, v1, v2 1655; GFX8-NEXT: s_lshl_b32 s5, s5, s11 1656; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 1657; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 1658; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1659; GFX8-NEXT: v_mul_hi_u32 v1, v2, v1 1660; GFX8-NEXT: s_and_b32 s8, s13, s10 1661; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1662; GFX8-NEXT: s_or_b32 s5, s12, s5 1663; GFX8-NEXT: s_bfe_u32 s8, s8, 0x100000 1664; GFX8-NEXT: v_subrev_u32_e32 v3, vcc, 24, v0 1665; GFX8-NEXT: s_bfe_u32 s5, s5, 0x100000 1666; GFX8-NEXT: s_lshl_b32 s8, s8, 16 1667; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1668; GFX8-NEXT: s_or_b32 s5, s5, s8 1669; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1670; GFX8-NEXT: v_add_u32_e32 v1, vcc, v2, v1 1671; GFX8-NEXT: v_mul_hi_u32 v1, s5, v1 1672; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 1673; GFX8-NEXT: s_bfe_u32 s6, s6, 0x100000 1674; GFX8-NEXT: s_mov_b32 s8, 0xffffff 1675; GFX8-NEXT: v_mul_lo_u32 v1, v1, 24 1676; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v0 1677; GFX8-NEXT: s_lshl_b32 s4, s6, 17 1678; GFX8-NEXT: s_lshl_b32 s0, s0, 1 1679; GFX8-NEXT: s_or_b32 s0, s4, s0 1680; GFX8-NEXT: v_and_b32_e32 v2, s8, v3 1681; GFX8-NEXT: v_and_b32_e32 v0, s8, v0 1682; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 1683; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s2 1684; GFX8-NEXT: v_sub_u32_e32 v1, vcc, s5, v1 1685; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 1686; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 1687; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 1688; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1689; GFX8-NEXT: v_subrev_u32_e32 v2, vcc, 24, v1 1690; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 1691; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 1692; GFX8-NEXT: s_bfe_u32 s7, s7, 0x100000 1693; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 1694; GFX8-NEXT: v_mov_b32_e32 v4, 0xffffff 1695; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 23, v1 1696; GFX8-NEXT: s_lshl_b32 s0, s7, 17 1697; GFX8-NEXT: s_lshl_b32 s1, s1, 1 1698; GFX8-NEXT: s_or_b32 s0, s0, s1 1699; GFX8-NEXT: v_and_b32_e32 v2, v2, v4 1700; GFX8-NEXT: v_and_b32_e32 v1, v1, v4 1701; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 1702; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s3 1703; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 1704; GFX8-NEXT: v_mov_b32_e32 v2, 8 1705; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1706; GFX8-NEXT: v_mov_b32_e32 v4, 16 1707; GFX8-NEXT: v_or_b32_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1708; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1709; GFX8-NEXT: v_or_b32_e32 v0, v3, v0 1710; GFX8-NEXT: v_and_b32_e32 v3, s10, v1 1711; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1712; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1713; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 1714; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD 1715; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1716; GFX8-NEXT: v_readfirstlane_b32 s1, v1 1717; GFX8-NEXT: ; return to shader part epilog 1718; 1719; GFX9-LABEL: s_fshr_v2i24: 1720; GFX9: ; %bb.0: 1721; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1722; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 1723; GFX9-NEXT: v_mov_b32_e32 v1, 0xffffffe8 1724; GFX9-NEXT: s_movk_i32 s12, 0xff 1725; GFX9-NEXT: s_lshr_b32 s11, s1, 8 1726; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1727; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 1728; GFX9-NEXT: s_bfe_u32 s13, 8, 0x100000 1729; GFX9-NEXT: s_and_b32 s1, s1, s12 1730; GFX9-NEXT: s_lshr_b32 s7, s0, 8 1731; GFX9-NEXT: v_mul_lo_u32 v2, v1, v0 1732; GFX9-NEXT: s_lshr_b32 s10, s0, 24 1733; GFX9-NEXT: s_lshl_b32 s1, s1, s13 1734; GFX9-NEXT: s_and_b32 s7, s7, s12 1735; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 1736; GFX9-NEXT: s_or_b32 s1, s10, s1 1737; GFX9-NEXT: s_lshr_b32 s10, s2, 8 1738; GFX9-NEXT: s_lshr_b32 s9, s0, 16 1739; GFX9-NEXT: s_and_b32 s0, s0, s12 1740; GFX9-NEXT: s_lshl_b32 s7, s7, s13 1741; GFX9-NEXT: s_and_b32 s10, s10, s12 1742; GFX9-NEXT: s_or_b32 s0, s0, s7 1743; GFX9-NEXT: s_and_b32 s7, s9, s12 1744; GFX9-NEXT: s_and_b32 s9, s11, s12 1745; GFX9-NEXT: s_lshr_b32 s11, s2, 16 1746; GFX9-NEXT: s_lshr_b32 s14, s2, 24 1747; GFX9-NEXT: s_and_b32 s2, s2, s12 1748; GFX9-NEXT: s_lshl_b32 s10, s10, s13 1749; GFX9-NEXT: s_or_b32 s2, s2, s10 1750; GFX9-NEXT: s_and_b32 s10, s11, s12 1751; GFX9-NEXT: v_add_u32_e32 v0, v0, v2 1752; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 1753; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 1754; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 1755; GFX9-NEXT: s_lshr_b32 s15, s3, 8 1756; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000 1757; GFX9-NEXT: s_lshl_b32 s10, s10, 16 1758; GFX9-NEXT: s_and_b32 s3, s3, s12 1759; GFX9-NEXT: s_or_b32 s2, s2, s10 1760; GFX9-NEXT: s_lshl_b32 s3, s3, s13 1761; GFX9-NEXT: s_and_b32 s10, s15, s12 1762; GFX9-NEXT: s_or_b32 s3, s14, s3 1763; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 1764; GFX9-NEXT: s_bfe_u32 s3, s3, 0x100000 1765; GFX9-NEXT: s_lshl_b32 s10, s10, 16 1766; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 1767; GFX9-NEXT: s_or_b32 s3, s3, s10 1768; GFX9-NEXT: s_lshr_b32 s10, s4, 8 1769; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 1770; GFX9-NEXT: s_and_b32 s10, s10, s12 1771; GFX9-NEXT: s_lshr_b32 s11, s4, 16 1772; GFX9-NEXT: s_lshr_b32 s14, s4, 24 1773; GFX9-NEXT: s_and_b32 s4, s4, s12 1774; GFX9-NEXT: s_lshl_b32 s10, s10, s13 1775; GFX9-NEXT: s_or_b32 s4, s4, s10 1776; GFX9-NEXT: s_and_b32 s10, s11, s12 1777; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 1778; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2 1779; GFX9-NEXT: s_bfe_u32 s4, s4, 0x100000 1780; GFX9-NEXT: s_lshl_b32 s10, s10, 16 1781; GFX9-NEXT: s_or_b32 s4, s4, s10 1782; GFX9-NEXT: v_mul_hi_u32 v0, s4, v0 1783; GFX9-NEXT: s_lshr_b32 s15, s5, 8 1784; GFX9-NEXT: s_and_b32 s5, s5, s12 1785; GFX9-NEXT: v_mul_hi_u32 v1, v2, v1 1786; GFX9-NEXT: s_lshl_b32 s5, s5, s13 1787; GFX9-NEXT: s_and_b32 s10, s15, s12 1788; GFX9-NEXT: s_or_b32 s5, s14, s5 1789; GFX9-NEXT: s_bfe_u32 s10, s10, 0x100000 1790; GFX9-NEXT: s_bfe_u32 s5, s5, 0x100000 1791; GFX9-NEXT: s_lshl_b32 s10, s10, 16 1792; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 1793; GFX9-NEXT: s_or_b32 s5, s5, s10 1794; GFX9-NEXT: v_add_u32_e32 v1, v2, v1 1795; GFX9-NEXT: v_mul_hi_u32 v1, s5, v1 1796; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0 1797; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 1798; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1799; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1800; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 1801; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v0 1802; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1803; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000 1804; GFX9-NEXT: s_bfe_u32 s7, s7, 0x100000 1805; GFX9-NEXT: s_mov_b32 s10, 0xffffff 1806; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1807; GFX9-NEXT: v_sub_u32_e32 v3, 23, v0 1808; GFX9-NEXT: s_lshl_b32 s4, s7, 17 1809; GFX9-NEXT: s_lshl_b32 s0, s0, 1 1810; GFX9-NEXT: v_and_b32_e32 v0, s10, v0 1811; GFX9-NEXT: s_or_b32 s0, s4, s0 1812; GFX9-NEXT: v_and_b32_e32 v3, s10, v3 1813; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s2 1814; GFX9-NEXT: v_sub_u32_e32 v1, s5, v1 1815; GFX9-NEXT: v_lshl_or_b32 v0, s0, v3, v0 1816; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 1817; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 1818; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1819; GFX9-NEXT: v_subrev_u32_e32 v3, 24, v1 1820; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 1821; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 1822; GFX9-NEXT: s_bfe_u32 s9, s9, 0x100000 1823; GFX9-NEXT: v_mov_b32_e32 v2, 0xffffff 1824; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 1825; GFX9-NEXT: v_sub_u32_e32 v3, 23, v1 1826; GFX9-NEXT: s_lshl_b32 s0, s9, 17 1827; GFX9-NEXT: s_lshl_b32 s1, s1, 1 1828; GFX9-NEXT: v_and_b32_e32 v1, v1, v2 1829; GFX9-NEXT: s_or_b32 s0, s0, s1 1830; GFX9-NEXT: v_and_b32_e32 v3, v3, v2 1831; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s3 1832; GFX9-NEXT: s_mov_b32 s6, 8 1833; GFX9-NEXT: v_lshl_or_b32 v1, s0, v3, v1 1834; GFX9-NEXT: s_mov_b32 s8, 16 1835; GFX9-NEXT: v_lshlrev_b32_sdwa v2, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1836; GFX9-NEXT: v_and_b32_e32 v3, s12, v1 1837; GFX9-NEXT: v_and_or_b32 v2, v0, s12, v2 1838; GFX9-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1839; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1840; GFX9-NEXT: v_or3_b32 v0, v2, v0, v3 1841; GFX9-NEXT: v_bfe_u32 v2, v1, 8, 8 1842; GFX9-NEXT: v_bfe_u32 v1, v1, 16, 8 1843; GFX9-NEXT: v_lshl_or_b32 v1, v1, 8, v2 1844; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1845; GFX9-NEXT: v_readfirstlane_b32 s1, v1 1846; GFX9-NEXT: ; return to shader part epilog 1847; 1848; GFX10-LABEL: s_fshr_v2i24: 1849; GFX10: ; %bb.0: 1850; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1851; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v1, 24 1852; GFX10-NEXT: s_movk_i32 s9, 0xff 1853; GFX10-NEXT: s_lshr_b32 s12, s4, 8 1854; GFX10-NEXT: s_bfe_u32 s10, 8, 0x100000 1855; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 1856; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v1 1857; GFX10-NEXT: s_lshr_b32 s13, s4, 16 1858; GFX10-NEXT: s_and_b32 s12, s12, s9 1859; GFX10-NEXT: s_lshr_b32 s14, s4, 24 1860; GFX10-NEXT: s_and_b32 s4, s4, s9 1861; GFX10-NEXT: s_lshl_b32 s12, s12, s10 1862; GFX10-NEXT: s_and_b32 s13, s13, s9 1863; GFX10-NEXT: s_or_b32 s4, s4, s12 1864; GFX10-NEXT: s_bfe_u32 s12, s13, 0x100000 1865; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1866; GFX10-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 1867; GFX10-NEXT: s_lshr_b32 s15, s5, 8 1868; GFX10-NEXT: s_bfe_u32 s4, s4, 0x100000 1869; GFX10-NEXT: s_lshl_b32 s12, s12, 16 1870; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 1871; GFX10-NEXT: v_cvt_u32_f32_e32 v1, v1 1872; GFX10-NEXT: s_and_b32 s5, s5, s9 1873; GFX10-NEXT: s_or_b32 s4, s4, s12 1874; GFX10-NEXT: s_lshl_b32 s5, s5, s10 1875; GFX10-NEXT: v_mul_lo_u32 v2, 0xffffffe8, v0 1876; GFX10-NEXT: v_mul_lo_u32 v3, 0xffffffe8, v1 1877; GFX10-NEXT: s_and_b32 s12, s15, s9 1878; GFX10-NEXT: s_or_b32 s5, s14, s5 1879; GFX10-NEXT: s_bfe_u32 s12, s12, 0x100000 1880; GFX10-NEXT: s_bfe_u32 s5, s5, 0x100000 1881; GFX10-NEXT: s_lshl_b32 s12, s12, 16 1882; GFX10-NEXT: s_lshr_b32 s11, s1, 8 1883; GFX10-NEXT: v_mul_hi_u32 v2, v0, v2 1884; GFX10-NEXT: s_or_b32 s5, s5, s12 1885; GFX10-NEXT: s_and_b32 s1, s1, s9 1886; GFX10-NEXT: s_lshr_b32 s6, s0, 8 1887; GFX10-NEXT: s_lshr_b32 s8, s0, 24 1888; GFX10-NEXT: s_lshl_b32 s1, s1, s10 1889; GFX10-NEXT: s_and_b32 s6, s6, s9 1890; GFX10-NEXT: s_or_b32 s1, s8, s1 1891; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v2 1892; GFX10-NEXT: v_mul_hi_u32 v2, v1, v3 1893; GFX10-NEXT: s_lshr_b32 s8, s2, 8 1894; GFX10-NEXT: s_lshr_b32 s7, s0, 16 1895; GFX10-NEXT: s_and_b32 s0, s0, s9 1896; GFX10-NEXT: v_mul_hi_u32 v0, s4, v0 1897; GFX10-NEXT: s_lshl_b32 s6, s6, s10 1898; GFX10-NEXT: s_and_b32 s8, s8, s9 1899; GFX10-NEXT: s_or_b32 s0, s0, s6 1900; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v2 1901; GFX10-NEXT: s_and_b32 s6, s7, s9 1902; GFX10-NEXT: s_and_b32 s7, s11, s9 1903; GFX10-NEXT: s_lshr_b32 s11, s2, 16 1904; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 1905; GFX10-NEXT: v_mul_hi_u32 v1, s5, v1 1906; GFX10-NEXT: s_lshr_b32 s13, s2, 24 1907; GFX10-NEXT: s_and_b32 s2, s2, s9 1908; GFX10-NEXT: s_lshl_b32 s8, s8, s10 1909; GFX10-NEXT: s_lshr_b32 s12, s3, 8 1910; GFX10-NEXT: s_or_b32 s2, s2, s8 1911; GFX10-NEXT: s_and_b32 s8, s11, s9 1912; GFX10-NEXT: v_sub_nc_u32_e32 v0, s4, v0 1913; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 1914; GFX10-NEXT: s_bfe_u32 s4, s8, 0x100000 1915; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 1916; GFX10-NEXT: s_lshl_b32 s4, s4, 16 1917; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v0 1918; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1919; GFX10-NEXT: s_and_b32 s3, s3, s9 1920; GFX10-NEXT: s_or_b32 s2, s2, s4 1921; GFX10-NEXT: v_sub_nc_u32_e32 v1, s5, v1 1922; GFX10-NEXT: s_mov_b32 s4, 0xffffff 1923; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 1924; GFX10-NEXT: s_lshl_b32 s3, s3, s10 1925; GFX10-NEXT: s_and_b32 s5, s12, s9 1926; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 1927; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 1928; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 24, v0 1929; GFX10-NEXT: s_or_b32 s3, s13, s3 1930; GFX10-NEXT: s_bfe_u32 s5, s5, 0x100000 1931; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000 1932; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 1933; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1934; GFX10-NEXT: s_lshl_b32 s5, s5, 16 1935; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000 1936; GFX10-NEXT: s_bfe_u32 s6, s6, 0x100000 1937; GFX10-NEXT: v_subrev_nc_u32_e32 v2, 24, v1 1938; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 1939; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 1940; GFX10-NEXT: s_or_b32 s3, s3, s5 1941; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 1942; GFX10-NEXT: s_bfe_u32 s7, s7, 0x100000 1943; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0 1944; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 1945; GFX10-NEXT: v_mov_b32_e32 v2, 0xffffff 1946; GFX10-NEXT: v_and_b32_e32 v0, s4, v0 1947; GFX10-NEXT: s_lshl_b32 s5, s6, 17 1948; GFX10-NEXT: v_and_b32_e32 v3, s4, v3 1949; GFX10-NEXT: v_sub_nc_u32_e32 v4, 23, v1 1950; GFX10-NEXT: v_and_b32_e32 v1, v1, v2 1951; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s2 1952; GFX10-NEXT: s_lshl_b32 s0, s0, 1 1953; GFX10-NEXT: s_lshl_b32 s2, s7, 17 1954; GFX10-NEXT: v_and_b32_e32 v2, v4, v2 1955; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s3 1956; GFX10-NEXT: s_or_b32 s0, s5, s0 1957; GFX10-NEXT: s_lshl_b32 s1, s1, 1 1958; GFX10-NEXT: v_lshl_or_b32 v0, s0, v3, v0 1959; GFX10-NEXT: s_or_b32 s0, s2, s1 1960; GFX10-NEXT: v_lshl_or_b32 v1, s0, v2, v1 1961; GFX10-NEXT: s_mov_b32 s0, 8 1962; GFX10-NEXT: v_lshlrev_b32_sdwa v2, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 1963; GFX10-NEXT: s_mov_b32 s0, 16 1964; GFX10-NEXT: v_and_b32_e32 v3, s9, v1 1965; GFX10-NEXT: v_bfe_u32 v4, v1, 8, 8 1966; GFX10-NEXT: v_bfe_u32 v1, v1, 16, 8 1967; GFX10-NEXT: v_and_or_b32 v2, v0, s9, v2 1968; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 1969; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v3 1970; GFX10-NEXT: v_lshl_or_b32 v1, v1, 8, v4 1971; GFX10-NEXT: v_or3_b32 v0, v2, v0, v3 1972; GFX10-NEXT: v_readfirstlane_b32 s1, v1 1973; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1974; GFX10-NEXT: ; return to shader part epilog 1975 %lhs = bitcast i48 %lhs.arg to <2 x i24> 1976 %rhs = bitcast i48 %rhs.arg to <2 x i24> 1977 %amt = bitcast i48 %amt.arg to <2 x i24> 1978 %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) 1979 %cast.result = bitcast <2 x i24> %result to i48 1980 ret i48 %cast.result 1981} 1982 1983define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { 1984; GFX6-LABEL: v_fshr_v2i24: 1985; GFX6: ; %bb.0: 1986; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1987; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 1988; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6 1989; GFX6-NEXT: v_mov_b32_e32 v7, 0xffffffe8 1990; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 1991; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 1992; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 1993; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6 1994; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1995; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 1996; GFX6-NEXT: v_mul_lo_u32 v8, v7, v6 1997; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8 1998; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 1999; GFX6-NEXT: v_mul_hi_u32 v6, v4, v6 2000; GFX6-NEXT: v_rcp_iflag_f32_e32 v8, v9 2001; GFX6-NEXT: v_mov_b32_e32 v9, 0xffffff 2002; GFX6-NEXT: v_and_b32_e32 v5, v5, v9 2003; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 2004; GFX6-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 2005; GFX6-NEXT: v_cvt_u32_f32_e32 v8, v8 2006; GFX6-NEXT: v_and_b32_e32 v2, v2, v9 2007; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v6 2008; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 2009; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2010; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2011; GFX6-NEXT: v_subrev_i32_e32 v6, vcc, 24, v4 2012; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2013; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2014; GFX6-NEXT: v_mul_lo_u32 v6, v7, v8 2015; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 23, v4 2016; GFX6-NEXT: v_and_b32_e32 v7, v7, v9 2017; GFX6-NEXT: v_mul_hi_u32 v6, v8, v6 2018; GFX6-NEXT: v_and_b32_e32 v4, v4, v9 2019; GFX6-NEXT: v_lshlrev_b32_e32 v0, v7, v0 2020; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2021; GFX6-NEXT: v_add_i32_e32 v6, vcc, v8, v6 2022; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6 2023; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 2024; GFX6-NEXT: v_and_b32_e32 v3, v3, v9 2025; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 2026; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6 2027; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2 2028; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2029; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2030; GFX6-NEXT: v_subrev_i32_e32 v4, vcc, 24, v2 2031; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2032; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2033; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2 2034; GFX6-NEXT: v_and_b32_e32 v4, v4, v9 2035; GFX6-NEXT: v_and_b32_e32 v2, v2, v9 2036; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1 2037; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v3 2038; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 2039; GFX6-NEXT: s_setpc_b64 s[30:31] 2040; 2041; GFX8-LABEL: v_fshr_v2i24: 2042; GFX8: ; %bb.0: 2043; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2044; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2045; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6 2046; GFX8-NEXT: v_mov_b32_e32 v7, 0xffffffe8 2047; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2048; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 2049; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2050; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 2051; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2052; GFX8-NEXT: v_lshlrev_b32_e32 v1, 1, v1 2053; GFX8-NEXT: v_mul_lo_u32 v8, v7, v6 2054; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8 2055; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 2056; GFX8-NEXT: v_mul_hi_u32 v6, v4, v6 2057; GFX8-NEXT: v_rcp_iflag_f32_e32 v8, v9 2058; GFX8-NEXT: v_mov_b32_e32 v9, 0xffffff 2059; GFX8-NEXT: v_and_b32_e32 v5, v5, v9 2060; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 2061; GFX8-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8 2062; GFX8-NEXT: v_cvt_u32_f32_e32 v8, v8 2063; GFX8-NEXT: v_and_b32_e32 v2, v2, v9 2064; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v6 2065; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 2066; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2067; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2068; GFX8-NEXT: v_subrev_u32_e32 v6, vcc, 24, v4 2069; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2070; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2071; GFX8-NEXT: v_mul_lo_u32 v6, v7, v8 2072; GFX8-NEXT: v_sub_u32_e32 v7, vcc, 23, v4 2073; GFX8-NEXT: v_and_b32_e32 v7, v7, v9 2074; GFX8-NEXT: v_mul_hi_u32 v6, v8, v6 2075; GFX8-NEXT: v_and_b32_e32 v4, v4, v9 2076; GFX8-NEXT: v_lshlrev_b32_e32 v0, v7, v0 2077; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2078; GFX8-NEXT: v_add_u32_e32 v6, vcc, v8, v6 2079; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 2080; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2081; GFX8-NEXT: v_and_b32_e32 v3, v3, v9 2082; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 2083; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6 2084; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2 2085; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2086; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2087; GFX8-NEXT: v_subrev_u32_e32 v4, vcc, 24, v2 2088; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2089; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2090; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 23, v2 2091; GFX8-NEXT: v_and_b32_e32 v4, v4, v9 2092; GFX8-NEXT: v_and_b32_e32 v2, v2, v9 2093; GFX8-NEXT: v_lshlrev_b32_e32 v1, v4, v1 2094; GFX8-NEXT: v_lshrrev_b32_e32 v2, v2, v3 2095; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 2096; GFX8-NEXT: s_setpc_b64 s[30:31] 2097; 2098; GFX9-LABEL: v_fshr_v2i24: 2099; GFX9: ; %bb.0: 2100; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2101; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2102; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 2103; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v9, 24 2104; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v9 2105; GFX9-NEXT: v_mov_b32_e32 v7, 0xffffffe8 2106; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2107; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 2108; GFX9-NEXT: v_mul_f32_e32 v9, 0x4f7ffffe, v9 2109; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v9 2110; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2111; GFX9-NEXT: v_mul_lo_u32 v8, v7, v6 2112; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2113; GFX9-NEXT: v_mul_lo_u32 v7, v7, v9 2114; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1 2115; GFX9-NEXT: v_mul_hi_u32 v8, v6, v8 2116; GFX9-NEXT: v_mul_hi_u32 v7, v9, v7 2117; GFX9-NEXT: v_add_u32_e32 v6, v6, v8 2118; GFX9-NEXT: v_mul_hi_u32 v6, v4, v6 2119; GFX9-NEXT: v_mov_b32_e32 v8, 0xffffff 2120; GFX9-NEXT: v_and_b32_e32 v5, v5, v8 2121; GFX9-NEXT: v_add_u32_e32 v7, v9, v7 2122; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 2123; GFX9-NEXT: v_mul_hi_u32 v7, v5, v7 2124; GFX9-NEXT: v_and_b32_e32 v2, v2, v8 2125; GFX9-NEXT: v_and_b32_e32 v3, v3, v8 2126; GFX9-NEXT: v_sub_u32_e32 v4, v4, v6 2127; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 2128; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2129; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2130; GFX9-NEXT: v_subrev_u32_e32 v6, 24, v4 2131; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2132; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 2133; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2134; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4 2135; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 2136; GFX9-NEXT: v_and_b32_e32 v6, v6, v8 2137; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2138; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2 2139; GFX9-NEXT: v_sub_u32_e32 v2, v5, v7 2140; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2 2141; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2142; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2143; GFX9-NEXT: v_subrev_u32_e32 v4, 24, v2 2144; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2145; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2146; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2 2147; GFX9-NEXT: v_and_b32_e32 v2, v2, v8 2148; GFX9-NEXT: v_and_b32_e32 v4, v4, v8 2149; GFX9-NEXT: v_lshrrev_b32_e32 v2, v2, v3 2150; GFX9-NEXT: v_lshl_or_b32 v1, v1, v4, v2 2151; GFX9-NEXT: s_setpc_b64 s[30:31] 2152; 2153; GFX10-LABEL: v_fshr_v2i24: 2154; GFX10: ; %bb.0: 2155; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2156; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2157; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2158; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v7, 24 2159; GFX10-NEXT: v_mov_b32_e32 v10, 0xffffff 2160; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2161; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2162; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 2163; GFX10-NEXT: v_rcp_iflag_f32_e32 v7, v7 2164; GFX10-NEXT: v_and_b32_e32 v5, v5, v10 2165; GFX10-NEXT: v_and_b32_e32 v2, v2, v10 2166; GFX10-NEXT: v_and_b32_e32 v3, v3, v10 2167; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v1 2168; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2169; GFX10-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7 2170; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 2171; GFX10-NEXT: v_cvt_u32_f32_e32 v7, v7 2172; GFX10-NEXT: v_mul_lo_u32 v8, 0xffffffe8, v6 2173; GFX10-NEXT: v_mul_lo_u32 v9, 0xffffffe8, v7 2174; GFX10-NEXT: v_mul_hi_u32 v8, v6, v8 2175; GFX10-NEXT: v_mul_hi_u32 v9, v7, v9 2176; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v8 2177; GFX10-NEXT: v_add_nc_u32_e32 v7, v7, v9 2178; GFX10-NEXT: v_mul_hi_u32 v6, v4, v6 2179; GFX10-NEXT: v_mul_hi_u32 v7, v5, v7 2180; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 2181; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24 2182; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v6 2183; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v7 2184; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 2185; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2186; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 2187; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo 2188; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2189; GFX10-NEXT: v_subrev_nc_u32_e32 v6, 24, v4 2190; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 2191; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2192; GFX10-NEXT: v_subrev_nc_u32_e32 v7, 24, v5 2193; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo 2194; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2195; GFX10-NEXT: v_sub_nc_u32_e32 v6, 23, v4 2196; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 2197; GFX10-NEXT: v_and_b32_e32 v4, v4, v10 2198; GFX10-NEXT: v_and_b32_e32 v6, v6, v10 2199; GFX10-NEXT: v_sub_nc_u32_e32 v7, 23, v5 2200; GFX10-NEXT: v_and_b32_e32 v5, v5, v10 2201; GFX10-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2202; GFX10-NEXT: v_and_b32_e32 v4, v7, v10 2203; GFX10-NEXT: v_lshrrev_b32_e32 v3, v5, v3 2204; GFX10-NEXT: v_lshl_or_b32 v0, v0, v6, v2 2205; GFX10-NEXT: v_lshl_or_b32 v1, v1, v4, v3 2206; GFX10-NEXT: s_setpc_b64 s[30:31] 2207 %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) 2208 ret <2 x i24> %result 2209} 2210 2211define amdgpu_ps i32 @s_fshr_i32(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) { 2212; GFX6-LABEL: s_fshr_i32: 2213; GFX6: ; %bb.0: 2214; GFX6-NEXT: v_mov_b32_e32 v0, s1 2215; GFX6-NEXT: v_mov_b32_e32 v1, s2 2216; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 2217; GFX6-NEXT: v_readfirstlane_b32 s0, v0 2218; GFX6-NEXT: ; return to shader part epilog 2219; 2220; GFX8-LABEL: s_fshr_i32: 2221; GFX8: ; %bb.0: 2222; GFX8-NEXT: v_mov_b32_e32 v0, s1 2223; GFX8-NEXT: v_mov_b32_e32 v1, s2 2224; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 2225; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2226; GFX8-NEXT: ; return to shader part epilog 2227; 2228; GFX9-LABEL: s_fshr_i32: 2229; GFX9: ; %bb.0: 2230; GFX9-NEXT: v_mov_b32_e32 v0, s1 2231; GFX9-NEXT: v_mov_b32_e32 v1, s2 2232; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 2233; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2234; GFX9-NEXT: ; return to shader part epilog 2235; 2236; GFX10-LABEL: s_fshr_i32: 2237; GFX10: ; %bb.0: 2238; GFX10-NEXT: v_mov_b32_e32 v0, s2 2239; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0 2240; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2241; GFX10-NEXT: ; return to shader part epilog 2242 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2243 ret i32 %result 2244} 2245 2246define amdgpu_ps i32 @s_fshr_i32_5(i32 inreg %lhs, i32 inreg %rhs) { 2247; GFX6-LABEL: s_fshr_i32_5: 2248; GFX6: ; %bb.0: 2249; GFX6-NEXT: v_mov_b32_e32 v0, s1 2250; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 5 2251; GFX6-NEXT: v_readfirstlane_b32 s0, v0 2252; GFX6-NEXT: ; return to shader part epilog 2253; 2254; GFX8-LABEL: s_fshr_i32_5: 2255; GFX8: ; %bb.0: 2256; GFX8-NEXT: v_mov_b32_e32 v0, s1 2257; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 5 2258; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2259; GFX8-NEXT: ; return to shader part epilog 2260; 2261; GFX9-LABEL: s_fshr_i32_5: 2262; GFX9: ; %bb.0: 2263; GFX9-NEXT: v_mov_b32_e32 v0, s1 2264; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 5 2265; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2266; GFX9-NEXT: ; return to shader part epilog 2267; 2268; GFX10-LABEL: s_fshr_i32_5: 2269; GFX10: ; %bb.0: 2270; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 5 2271; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2272; GFX10-NEXT: ; return to shader part epilog 2273 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5) 2274 ret i32 %result 2275} 2276 2277define amdgpu_ps i32 @s_fshr_i32_8(i32 inreg %lhs, i32 inreg %rhs) { 2278; GFX6-LABEL: s_fshr_i32_8: 2279; GFX6: ; %bb.0: 2280; GFX6-NEXT: v_mov_b32_e32 v0, s1 2281; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 8 2282; GFX6-NEXT: v_readfirstlane_b32 s0, v0 2283; GFX6-NEXT: ; return to shader part epilog 2284; 2285; GFX8-LABEL: s_fshr_i32_8: 2286; GFX8: ; %bb.0: 2287; GFX8-NEXT: v_mov_b32_e32 v0, s1 2288; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 8 2289; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2290; GFX8-NEXT: ; return to shader part epilog 2291; 2292; GFX9-LABEL: s_fshr_i32_8: 2293; GFX9: ; %bb.0: 2294; GFX9-NEXT: v_mov_b32_e32 v0, s1 2295; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 8 2296; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2297; GFX9-NEXT: ; return to shader part epilog 2298; 2299; GFX10-LABEL: s_fshr_i32_8: 2300; GFX10: ; %bb.0: 2301; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 8 2302; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2303; GFX10-NEXT: ; return to shader part epilog 2304 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8) 2305 ret i32 %result 2306} 2307 2308define i32 @v_fshr_i32(i32 %lhs, i32 %rhs, i32 %amt) { 2309; GFX6-LABEL: v_fshr_i32: 2310; GFX6: ; %bb.0: 2311; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2312; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, v2 2313; GFX6-NEXT: s_setpc_b64 s[30:31] 2314; 2315; GFX8-LABEL: v_fshr_i32: 2316; GFX8: ; %bb.0: 2317; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2318; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, v2 2319; GFX8-NEXT: s_setpc_b64 s[30:31] 2320; 2321; GFX9-LABEL: v_fshr_i32: 2322; GFX9: ; %bb.0: 2323; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2324; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, v2 2325; GFX9-NEXT: s_setpc_b64 s[30:31] 2326; 2327; GFX10-LABEL: v_fshr_i32: 2328; GFX10: ; %bb.0: 2329; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2330; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2331; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, v2 2332; GFX10-NEXT: s_setpc_b64 s[30:31] 2333 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2334 ret i32 %result 2335} 2336 2337define i32 @v_fshr_i32_5(i32 %lhs, i32 %rhs) { 2338; GFX6-LABEL: v_fshr_i32_5: 2339; GFX6: ; %bb.0: 2340; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2341; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, 5 2342; GFX6-NEXT: s_setpc_b64 s[30:31] 2343; 2344; GFX8-LABEL: v_fshr_i32_5: 2345; GFX8: ; %bb.0: 2346; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2347; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, 5 2348; GFX8-NEXT: s_setpc_b64 s[30:31] 2349; 2350; GFX9-LABEL: v_fshr_i32_5: 2351; GFX9: ; %bb.0: 2352; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2353; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, 5 2354; GFX9-NEXT: s_setpc_b64 s[30:31] 2355; 2356; GFX10-LABEL: v_fshr_i32_5: 2357; GFX10: ; %bb.0: 2358; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2359; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2360; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, 5 2361; GFX10-NEXT: s_setpc_b64 s[30:31] 2362 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5) 2363 ret i32 %result 2364} 2365 2366define i32 @v_fshr_i32_8(i32 %lhs, i32 %rhs) { 2367; GFX6-LABEL: v_fshr_i32_8: 2368; GFX6: ; %bb.0: 2369; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2370; GFX6-NEXT: v_alignbit_b32 v0, v0, v1, 8 2371; GFX6-NEXT: s_setpc_b64 s[30:31] 2372; 2373; GFX8-LABEL: v_fshr_i32_8: 2374; GFX8: ; %bb.0: 2375; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2376; GFX8-NEXT: v_alignbit_b32 v0, v0, v1, 8 2377; GFX8-NEXT: s_setpc_b64 s[30:31] 2378; 2379; GFX9-LABEL: v_fshr_i32_8: 2380; GFX9: ; %bb.0: 2381; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2382; GFX9-NEXT: v_alignbit_b32 v0, v0, v1, 8 2383; GFX9-NEXT: s_setpc_b64 s[30:31] 2384; 2385; GFX10-LABEL: v_fshr_i32_8: 2386; GFX10: ; %bb.0: 2387; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2388; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2389; GFX10-NEXT: v_alignbit_b32 v0, v0, v1, 8 2390; GFX10-NEXT: s_setpc_b64 s[30:31] 2391 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8) 2392 ret i32 %result 2393} 2394 2395define amdgpu_ps float @v_fshr_i32_ssv(i32 inreg %lhs, i32 inreg %rhs, i32 %amt) { 2396; GFX6-LABEL: v_fshr_i32_ssv: 2397; GFX6: ; %bb.0: 2398; GFX6-NEXT: v_mov_b32_e32 v1, s1 2399; GFX6-NEXT: v_alignbit_b32 v0, s0, v1, v0 2400; GFX6-NEXT: ; return to shader part epilog 2401; 2402; GFX8-LABEL: v_fshr_i32_ssv: 2403; GFX8: ; %bb.0: 2404; GFX8-NEXT: v_mov_b32_e32 v1, s1 2405; GFX8-NEXT: v_alignbit_b32 v0, s0, v1, v0 2406; GFX8-NEXT: ; return to shader part epilog 2407; 2408; GFX9-LABEL: v_fshr_i32_ssv: 2409; GFX9: ; %bb.0: 2410; GFX9-NEXT: v_mov_b32_e32 v1, s1 2411; GFX9-NEXT: v_alignbit_b32 v0, s0, v1, v0 2412; GFX9-NEXT: ; return to shader part epilog 2413; 2414; GFX10-LABEL: v_fshr_i32_ssv: 2415; GFX10: ; %bb.0: 2416; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0 2417; GFX10-NEXT: ; return to shader part epilog 2418 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2419 %cast.result = bitcast i32 %result to float 2420 ret float %cast.result 2421} 2422 2423define amdgpu_ps float @v_fshr_i32_svs(i32 inreg %lhs, i32 %rhs, i32 inreg %amt) { 2424; GFX6-LABEL: v_fshr_i32_svs: 2425; GFX6: ; %bb.0: 2426; GFX6-NEXT: v_mov_b32_e32 v1, s1 2427; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 2428; GFX6-NEXT: ; return to shader part epilog 2429; 2430; GFX8-LABEL: v_fshr_i32_svs: 2431; GFX8: ; %bb.0: 2432; GFX8-NEXT: v_mov_b32_e32 v1, s1 2433; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 2434; GFX8-NEXT: ; return to shader part epilog 2435; 2436; GFX9-LABEL: v_fshr_i32_svs: 2437; GFX9: ; %bb.0: 2438; GFX9-NEXT: v_mov_b32_e32 v1, s1 2439; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 2440; GFX9-NEXT: ; return to shader part epilog 2441; 2442; GFX10-LABEL: v_fshr_i32_svs: 2443; GFX10: ; %bb.0: 2444; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1 2445; GFX10-NEXT: ; return to shader part epilog 2446 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2447 %cast.result = bitcast i32 %result to float 2448 ret float %cast.result 2449} 2450 2451define amdgpu_ps float @v_fshr_i32_vss(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) { 2452; GFX6-LABEL: v_fshr_i32_vss: 2453; GFX6: ; %bb.0: 2454; GFX6-NEXT: v_mov_b32_e32 v0, s1 2455; GFX6-NEXT: v_mov_b32_e32 v1, s2 2456; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 2457; GFX6-NEXT: ; return to shader part epilog 2458; 2459; GFX8-LABEL: v_fshr_i32_vss: 2460; GFX8: ; %bb.0: 2461; GFX8-NEXT: v_mov_b32_e32 v0, s1 2462; GFX8-NEXT: v_mov_b32_e32 v1, s2 2463; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 2464; GFX8-NEXT: ; return to shader part epilog 2465; 2466; GFX9-LABEL: v_fshr_i32_vss: 2467; GFX9: ; %bb.0: 2468; GFX9-NEXT: v_mov_b32_e32 v0, s1 2469; GFX9-NEXT: v_mov_b32_e32 v1, s2 2470; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 2471; GFX9-NEXT: ; return to shader part epilog 2472; 2473; GFX10-LABEL: v_fshr_i32_vss: 2474; GFX10: ; %bb.0: 2475; GFX10-NEXT: v_mov_b32_e32 v0, s2 2476; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0 2477; GFX10-NEXT: ; return to shader part epilog 2478 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2479 %cast.result = bitcast i32 %result to float 2480 ret float %cast.result 2481} 2482 2483define <2 x i32> @v_fshr_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) { 2484; GFX6-LABEL: v_fshr_v2i32: 2485; GFX6: ; %bb.0: 2486; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2487; GFX6-NEXT: v_alignbit_b32 v0, v0, v2, v4 2488; GFX6-NEXT: v_alignbit_b32 v1, v1, v3, v5 2489; GFX6-NEXT: s_setpc_b64 s[30:31] 2490; 2491; GFX8-LABEL: v_fshr_v2i32: 2492; GFX8: ; %bb.0: 2493; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2494; GFX8-NEXT: v_alignbit_b32 v0, v0, v2, v4 2495; GFX8-NEXT: v_alignbit_b32 v1, v1, v3, v5 2496; GFX8-NEXT: s_setpc_b64 s[30:31] 2497; 2498; GFX9-LABEL: v_fshr_v2i32: 2499; GFX9: ; %bb.0: 2500; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2501; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4 2502; GFX9-NEXT: v_alignbit_b32 v1, v1, v3, v5 2503; GFX9-NEXT: s_setpc_b64 s[30:31] 2504; 2505; GFX10-LABEL: v_fshr_v2i32: 2506; GFX10: ; %bb.0: 2507; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2508; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2509; GFX10-NEXT: v_alignbit_b32 v0, v0, v2, v4 2510; GFX10-NEXT: v_alignbit_b32 v1, v1, v3, v5 2511; GFX10-NEXT: s_setpc_b64 s[30:31] 2512 %result = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) 2513 ret <2 x i32> %result 2514} 2515 2516define <3 x i32> @v_fshr_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) { 2517; GFX6-LABEL: v_fshr_v3i32: 2518; GFX6: ; %bb.0: 2519; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2520; GFX6-NEXT: v_alignbit_b32 v0, v0, v3, v6 2521; GFX6-NEXT: v_alignbit_b32 v1, v1, v4, v7 2522; GFX6-NEXT: v_alignbit_b32 v2, v2, v5, v8 2523; GFX6-NEXT: s_setpc_b64 s[30:31] 2524; 2525; GFX8-LABEL: v_fshr_v3i32: 2526; GFX8: ; %bb.0: 2527; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2528; GFX8-NEXT: v_alignbit_b32 v0, v0, v3, v6 2529; GFX8-NEXT: v_alignbit_b32 v1, v1, v4, v7 2530; GFX8-NEXT: v_alignbit_b32 v2, v2, v5, v8 2531; GFX8-NEXT: s_setpc_b64 s[30:31] 2532; 2533; GFX9-LABEL: v_fshr_v3i32: 2534; GFX9: ; %bb.0: 2535; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2536; GFX9-NEXT: v_alignbit_b32 v0, v0, v3, v6 2537; GFX9-NEXT: v_alignbit_b32 v1, v1, v4, v7 2538; GFX9-NEXT: v_alignbit_b32 v2, v2, v5, v8 2539; GFX9-NEXT: s_setpc_b64 s[30:31] 2540; 2541; GFX10-LABEL: v_fshr_v3i32: 2542; GFX10: ; %bb.0: 2543; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2544; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2545; GFX10-NEXT: v_alignbit_b32 v0, v0, v3, v6 2546; GFX10-NEXT: v_alignbit_b32 v1, v1, v4, v7 2547; GFX10-NEXT: v_alignbit_b32 v2, v2, v5, v8 2548; GFX10-NEXT: s_setpc_b64 s[30:31] 2549 %result = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) 2550 ret <3 x i32> %result 2551} 2552 2553define <4 x i32> @v_fshr_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) { 2554; GFX6-LABEL: v_fshr_v4i32: 2555; GFX6: ; %bb.0: 2556; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2557; GFX6-NEXT: v_alignbit_b32 v0, v0, v4, v8 2558; GFX6-NEXT: v_alignbit_b32 v1, v1, v5, v9 2559; GFX6-NEXT: v_alignbit_b32 v2, v2, v6, v10 2560; GFX6-NEXT: v_alignbit_b32 v3, v3, v7, v11 2561; GFX6-NEXT: s_setpc_b64 s[30:31] 2562; 2563; GFX8-LABEL: v_fshr_v4i32: 2564; GFX8: ; %bb.0: 2565; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2566; GFX8-NEXT: v_alignbit_b32 v0, v0, v4, v8 2567; GFX8-NEXT: v_alignbit_b32 v1, v1, v5, v9 2568; GFX8-NEXT: v_alignbit_b32 v2, v2, v6, v10 2569; GFX8-NEXT: v_alignbit_b32 v3, v3, v7, v11 2570; GFX8-NEXT: s_setpc_b64 s[30:31] 2571; 2572; GFX9-LABEL: v_fshr_v4i32: 2573; GFX9: ; %bb.0: 2574; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2575; GFX9-NEXT: v_alignbit_b32 v0, v0, v4, v8 2576; GFX9-NEXT: v_alignbit_b32 v1, v1, v5, v9 2577; GFX9-NEXT: v_alignbit_b32 v2, v2, v6, v10 2578; GFX9-NEXT: v_alignbit_b32 v3, v3, v7, v11 2579; GFX9-NEXT: s_setpc_b64 s[30:31] 2580; 2581; GFX10-LABEL: v_fshr_v4i32: 2582; GFX10: ; %bb.0: 2583; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2584; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2585; GFX10-NEXT: v_alignbit_b32 v0, v0, v4, v8 2586; GFX10-NEXT: v_alignbit_b32 v1, v1, v5, v9 2587; GFX10-NEXT: v_alignbit_b32 v2, v2, v6, v10 2588; GFX10-NEXT: v_alignbit_b32 v3, v3, v7, v11 2589; GFX10-NEXT: s_setpc_b64 s[30:31] 2590 %result = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) 2591 ret <4 x i32> %result 2592} 2593 2594define amdgpu_ps i16 @s_fshr_i16(i16 inreg %lhs, i16 inreg %rhs, i16 inreg %amt) { 2595; GFX6-LABEL: s_fshr_i16: 2596; GFX6: ; %bb.0: 2597; GFX6-NEXT: s_and_b32 s3, s2, 15 2598; GFX6-NEXT: s_andn2_b32 s2, 15, s2 2599; GFX6-NEXT: s_lshl_b32 s0, s0, 1 2600; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 2601; GFX6-NEXT: s_lshl_b32 s0, s0, s2 2602; GFX6-NEXT: s_bfe_u32 s2, s3, 0x100000 2603; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 2604; GFX6-NEXT: s_lshr_b32 s1, s1, s2 2605; GFX6-NEXT: s_or_b32 s0, s0, s1 2606; GFX6-NEXT: ; return to shader part epilog 2607; 2608; GFX8-LABEL: s_fshr_i16: 2609; GFX8: ; %bb.0: 2610; GFX8-NEXT: s_and_b32 s3, s2, 15 2611; GFX8-NEXT: s_andn2_b32 s2, 15, s2 2612; GFX8-NEXT: s_bfe_u32 s4, 1, 0x100000 2613; GFX8-NEXT: s_lshl_b32 s0, s0, s4 2614; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 2615; GFX8-NEXT: s_lshl_b32 s0, s0, s2 2616; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 2617; GFX8-NEXT: s_bfe_u32 s2, s3, 0x100000 2618; GFX8-NEXT: s_lshr_b32 s1, s1, s2 2619; GFX8-NEXT: s_or_b32 s0, s0, s1 2620; GFX8-NEXT: ; return to shader part epilog 2621; 2622; GFX9-LABEL: s_fshr_i16: 2623; GFX9: ; %bb.0: 2624; GFX9-NEXT: s_and_b32 s3, s2, 15 2625; GFX9-NEXT: s_andn2_b32 s2, 15, s2 2626; GFX9-NEXT: s_bfe_u32 s4, 1, 0x100000 2627; GFX9-NEXT: s_lshl_b32 s0, s0, s4 2628; GFX9-NEXT: s_bfe_u32 s2, s2, 0x100000 2629; GFX9-NEXT: s_lshl_b32 s0, s0, s2 2630; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 2631; GFX9-NEXT: s_bfe_u32 s2, s3, 0x100000 2632; GFX9-NEXT: s_lshr_b32 s1, s1, s2 2633; GFX9-NEXT: s_or_b32 s0, s0, s1 2634; GFX9-NEXT: ; return to shader part epilog 2635; 2636; GFX10-LABEL: s_fshr_i16: 2637; GFX10: ; %bb.0: 2638; GFX10-NEXT: s_and_b32 s3, s2, 15 2639; GFX10-NEXT: s_bfe_u32 s4, 1, 0x100000 2640; GFX10-NEXT: s_andn2_b32 s2, 15, s2 2641; GFX10-NEXT: s_lshl_b32 s0, s0, s4 2642; GFX10-NEXT: s_bfe_u32 s2, s2, 0x100000 2643; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 2644; GFX10-NEXT: s_bfe_u32 s3, s3, 0x100000 2645; GFX10-NEXT: s_lshl_b32 s0, s0, s2 2646; GFX10-NEXT: s_lshr_b32 s1, s1, s3 2647; GFX10-NEXT: s_or_b32 s0, s0, s1 2648; GFX10-NEXT: ; return to shader part epilog 2649 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 2650 ret i16 %result 2651} 2652 2653define amdgpu_ps i16 @s_fshr_i16_4(i16 inreg %lhs, i16 inreg %rhs) { 2654; GFX6-LABEL: s_fshr_i16_4: 2655; GFX6: ; %bb.0: 2656; GFX6-NEXT: s_lshl_b32 s0, s0, 12 2657; GFX6-NEXT: s_bfe_u32 s1, s1, 0xc0004 2658; GFX6-NEXT: s_or_b32 s0, s0, s1 2659; GFX6-NEXT: ; return to shader part epilog 2660; 2661; GFX8-LABEL: s_fshr_i16_4: 2662; GFX8: ; %bb.0: 2663; GFX8-NEXT: s_bfe_u32 s2, 12, 0x100000 2664; GFX8-NEXT: s_lshl_b32 s0, s0, s2 2665; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 2666; GFX8-NEXT: s_bfe_u32 s2, 4, 0x100000 2667; GFX8-NEXT: s_lshr_b32 s1, s1, s2 2668; GFX8-NEXT: s_or_b32 s0, s0, s1 2669; GFX8-NEXT: ; return to shader part epilog 2670; 2671; GFX9-LABEL: s_fshr_i16_4: 2672; GFX9: ; %bb.0: 2673; GFX9-NEXT: s_bfe_u32 s2, 12, 0x100000 2674; GFX9-NEXT: s_lshl_b32 s0, s0, s2 2675; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 2676; GFX9-NEXT: s_bfe_u32 s2, 4, 0x100000 2677; GFX9-NEXT: s_lshr_b32 s1, s1, s2 2678; GFX9-NEXT: s_or_b32 s0, s0, s1 2679; GFX9-NEXT: ; return to shader part epilog 2680; 2681; GFX10-LABEL: s_fshr_i16_4: 2682; GFX10: ; %bb.0: 2683; GFX10-NEXT: s_bfe_u32 s2, 12, 0x100000 2684; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 2685; GFX10-NEXT: s_bfe_u32 s3, 4, 0x100000 2686; GFX10-NEXT: s_lshl_b32 s0, s0, s2 2687; GFX10-NEXT: s_lshr_b32 s1, s1, s3 2688; GFX10-NEXT: s_or_b32 s0, s0, s1 2689; GFX10-NEXT: ; return to shader part epilog 2690 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4) 2691 ret i16 %result 2692} 2693 2694define amdgpu_ps i16 @s_fshr_i16_5(i16 inreg %lhs, i16 inreg %rhs) { 2695; GFX6-LABEL: s_fshr_i16_5: 2696; GFX6: ; %bb.0: 2697; GFX6-NEXT: s_lshl_b32 s0, s0, 11 2698; GFX6-NEXT: s_bfe_u32 s1, s1, 0xb0005 2699; GFX6-NEXT: s_or_b32 s0, s0, s1 2700; GFX6-NEXT: ; return to shader part epilog 2701; 2702; GFX8-LABEL: s_fshr_i16_5: 2703; GFX8: ; %bb.0: 2704; GFX8-NEXT: s_bfe_u32 s2, 11, 0x100000 2705; GFX8-NEXT: s_lshl_b32 s0, s0, s2 2706; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 2707; GFX8-NEXT: s_bfe_u32 s2, 5, 0x100000 2708; GFX8-NEXT: s_lshr_b32 s1, s1, s2 2709; GFX8-NEXT: s_or_b32 s0, s0, s1 2710; GFX8-NEXT: ; return to shader part epilog 2711; 2712; GFX9-LABEL: s_fshr_i16_5: 2713; GFX9: ; %bb.0: 2714; GFX9-NEXT: s_bfe_u32 s2, 11, 0x100000 2715; GFX9-NEXT: s_lshl_b32 s0, s0, s2 2716; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 2717; GFX9-NEXT: s_bfe_u32 s2, 5, 0x100000 2718; GFX9-NEXT: s_lshr_b32 s1, s1, s2 2719; GFX9-NEXT: s_or_b32 s0, s0, s1 2720; GFX9-NEXT: ; return to shader part epilog 2721; 2722; GFX10-LABEL: s_fshr_i16_5: 2723; GFX10: ; %bb.0: 2724; GFX10-NEXT: s_bfe_u32 s2, 11, 0x100000 2725; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 2726; GFX10-NEXT: s_bfe_u32 s3, 5, 0x100000 2727; GFX10-NEXT: s_lshl_b32 s0, s0, s2 2728; GFX10-NEXT: s_lshr_b32 s1, s1, s3 2729; GFX10-NEXT: s_or_b32 s0, s0, s1 2730; GFX10-NEXT: ; return to shader part epilog 2731 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5) 2732 ret i16 %result 2733} 2734 2735define i16 @v_fshr_i16(i16 %lhs, i16 %rhs, i16 %amt) { 2736; GFX6-LABEL: v_fshr_i16: 2737; GFX6: ; %bb.0: 2738; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2739; GFX6-NEXT: v_and_b32_e32 v3, 15, v2 2740; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 2741; GFX6-NEXT: v_and_b32_e32 v2, 15, v2 2742; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2743; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16 2744; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 2745; GFX6-NEXT: v_bfe_u32 v2, v3, 0, 16 2746; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 2747; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 2748; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 2749; GFX6-NEXT: s_setpc_b64 s[30:31] 2750; 2751; GFX8-LABEL: v_fshr_i16: 2752; GFX8: ; %bb.0: 2753; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2754; GFX8-NEXT: v_and_b32_e32 v3, 15, v2 2755; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 2756; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 2757; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 2758; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 2759; GFX8-NEXT: v_lshrrev_b16_e32 v1, v3, v1 2760; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2761; GFX8-NEXT: s_setpc_b64 s[30:31] 2762; 2763; GFX9-LABEL: v_fshr_i16: 2764; GFX9: ; %bb.0: 2765; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2766; GFX9-NEXT: v_and_b32_e32 v3, 15, v2 2767; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 2768; GFX9-NEXT: v_and_b32_e32 v2, 15, v2 2769; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 2770; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 2771; GFX9-NEXT: v_lshrrev_b16_e32 v1, v3, v1 2772; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 2773; GFX9-NEXT: s_setpc_b64 s[30:31] 2774; 2775; GFX10-LABEL: v_fshr_i16: 2776; GFX10: ; %bb.0: 2777; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2778; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2779; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 2780; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 2781; GFX10-NEXT: v_and_b32_e32 v2, 15, v2 2782; GFX10-NEXT: v_and_b32_e32 v3, 15, v3 2783; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 2784; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0 2785; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 2786; GFX10-NEXT: s_setpc_b64 s[30:31] 2787 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 2788 ret i16 %result 2789} 2790 2791define i16 @v_fshr_i16_4(i16 %lhs, i16 %rhs) { 2792; GFX6-LABEL: v_fshr_i16_4: 2793; GFX6: ; %bb.0: 2794; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2795; GFX6-NEXT: v_lshlrev_b32_e32 v0, 12, v0 2796; GFX6-NEXT: v_bfe_u32 v1, v1, 4, 12 2797; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 2798; GFX6-NEXT: s_setpc_b64 s[30:31] 2799; 2800; GFX8-LABEL: v_fshr_i16_4: 2801; GFX8: ; %bb.0: 2802; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2803; GFX8-NEXT: v_lshlrev_b16_e32 v0, 12, v0 2804; GFX8-NEXT: v_lshrrev_b16_e32 v1, 4, v1 2805; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2806; GFX8-NEXT: s_setpc_b64 s[30:31] 2807; 2808; GFX9-LABEL: v_fshr_i16_4: 2809; GFX9: ; %bb.0: 2810; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2811; GFX9-NEXT: v_lshlrev_b16_e32 v0, 12, v0 2812; GFX9-NEXT: v_lshrrev_b16_e32 v1, 4, v1 2813; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 2814; GFX9-NEXT: s_setpc_b64 s[30:31] 2815; 2816; GFX10-LABEL: v_fshr_i16_4: 2817; GFX10: ; %bb.0: 2818; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2819; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2820; GFX10-NEXT: v_lshlrev_b16 v0, 12, v0 2821; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1 2822; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 2823; GFX10-NEXT: s_setpc_b64 s[30:31] 2824 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4) 2825 ret i16 %result 2826} 2827 2828define i16 @v_fshr_i16_5(i16 %lhs, i16 %rhs) { 2829; GFX6-LABEL: v_fshr_i16_5: 2830; GFX6: ; %bb.0: 2831; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2832; GFX6-NEXT: v_lshlrev_b32_e32 v0, 11, v0 2833; GFX6-NEXT: v_bfe_u32 v1, v1, 5, 11 2834; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 2835; GFX6-NEXT: s_setpc_b64 s[30:31] 2836; 2837; GFX8-LABEL: v_fshr_i16_5: 2838; GFX8: ; %bb.0: 2839; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2840; GFX8-NEXT: v_lshlrev_b16_e32 v0, 11, v0 2841; GFX8-NEXT: v_lshrrev_b16_e32 v1, 5, v1 2842; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2843; GFX8-NEXT: s_setpc_b64 s[30:31] 2844; 2845; GFX9-LABEL: v_fshr_i16_5: 2846; GFX9: ; %bb.0: 2847; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2848; GFX9-NEXT: v_lshlrev_b16_e32 v0, 11, v0 2849; GFX9-NEXT: v_lshrrev_b16_e32 v1, 5, v1 2850; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 2851; GFX9-NEXT: s_setpc_b64 s[30:31] 2852; 2853; GFX10-LABEL: v_fshr_i16_5: 2854; GFX10: ; %bb.0: 2855; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2856; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 2857; GFX10-NEXT: v_lshlrev_b16 v0, 11, v0 2858; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1 2859; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 2860; GFX10-NEXT: s_setpc_b64 s[30:31] 2861 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5) 2862 ret i16 %result 2863} 2864 2865define amdgpu_ps half @v_fshr_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) { 2866; GFX6-LABEL: v_fshr_i16_ssv: 2867; GFX6: ; %bb.0: 2868; GFX6-NEXT: v_and_b32_e32 v1, 15, v0 2869; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 2870; GFX6-NEXT: v_and_b32_e32 v0, 15, v0 2871; GFX6-NEXT: s_lshl_b32 s0, s0, 1 2872; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 2873; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 2874; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 2875; GFX6-NEXT: s_and_b32 s0, s1, 0xffff 2876; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 2877; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 2878; GFX6-NEXT: ; return to shader part epilog 2879; 2880; GFX8-LABEL: v_fshr_i16_ssv: 2881; GFX8: ; %bb.0: 2882; GFX8-NEXT: v_and_b32_e32 v1, 15, v0 2883; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 2884; GFX8-NEXT: s_bfe_u32 s2, 1, 0x100000 2885; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 2886; GFX8-NEXT: s_lshl_b32 s0, s0, s2 2887; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0 2888; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s1 2889; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2890; GFX8-NEXT: ; return to shader part epilog 2891; 2892; GFX9-LABEL: v_fshr_i16_ssv: 2893; GFX9: ; %bb.0: 2894; GFX9-NEXT: v_and_b32_e32 v1, 15, v0 2895; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 2896; GFX9-NEXT: s_bfe_u32 s2, 1, 0x100000 2897; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 2898; GFX9-NEXT: s_lshl_b32 s0, s0, s2 2899; GFX9-NEXT: v_lshlrev_b16_e64 v0, v0, s0 2900; GFX9-NEXT: v_lshrrev_b16_e64 v1, v1, s1 2901; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 2902; GFX9-NEXT: ; return to shader part epilog 2903; 2904; GFX10-LABEL: v_fshr_i16_ssv: 2905; GFX10: ; %bb.0: 2906; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 2907; GFX10-NEXT: v_and_b32_e32 v0, 15, v0 2908; GFX10-NEXT: s_bfe_u32 s2, 1, 0x100000 2909; GFX10-NEXT: s_lshl_b32 s0, s0, s2 2910; GFX10-NEXT: v_and_b32_e32 v1, 15, v1 2911; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1 2912; GFX10-NEXT: v_lshlrev_b16 v1, v1, s0 2913; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 2914; GFX10-NEXT: ; return to shader part epilog 2915 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 2916 %cast.result = bitcast i16 %result to half 2917 ret half %cast.result 2918} 2919 2920define amdgpu_ps half @v_fshr_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) { 2921; GFX6-LABEL: v_fshr_i16_svs: 2922; GFX6: ; %bb.0: 2923; GFX6-NEXT: s_and_b32 s2, s1, 15 2924; GFX6-NEXT: s_andn2_b32 s1, 15, s1 2925; GFX6-NEXT: s_lshl_b32 s0, s0, 1 2926; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 2927; GFX6-NEXT: s_lshl_b32 s0, s0, s1 2928; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000 2929; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 2930; GFX6-NEXT: v_lshrrev_b32_e32 v0, s1, v0 2931; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 2932; GFX6-NEXT: ; return to shader part epilog 2933; 2934; GFX8-LABEL: v_fshr_i16_svs: 2935; GFX8: ; %bb.0: 2936; GFX8-NEXT: s_and_b32 s2, s1, 15 2937; GFX8-NEXT: s_andn2_b32 s1, 15, s1 2938; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000 2939; GFX8-NEXT: s_lshl_b32 s0, s0, s3 2940; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 2941; GFX8-NEXT: s_lshl_b32 s0, s0, s1 2942; GFX8-NEXT: v_lshrrev_b16_e32 v0, s2, v0 2943; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 2944; GFX8-NEXT: ; return to shader part epilog 2945; 2946; GFX9-LABEL: v_fshr_i16_svs: 2947; GFX9: ; %bb.0: 2948; GFX9-NEXT: s_and_b32 s2, s1, 15 2949; GFX9-NEXT: s_andn2_b32 s1, 15, s1 2950; GFX9-NEXT: s_bfe_u32 s3, 1, 0x100000 2951; GFX9-NEXT: s_lshl_b32 s0, s0, s3 2952; GFX9-NEXT: s_bfe_u32 s1, s1, 0x100000 2953; GFX9-NEXT: s_lshl_b32 s0, s0, s1 2954; GFX9-NEXT: v_lshrrev_b16_e32 v0, s2, v0 2955; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 2956; GFX9-NEXT: ; return to shader part epilog 2957; 2958; GFX10-LABEL: v_fshr_i16_svs: 2959; GFX10: ; %bb.0: 2960; GFX10-NEXT: s_and_b32 s2, s1, 15 2961; GFX10-NEXT: s_bfe_u32 s3, 1, 0x100000 2962; GFX10-NEXT: s_andn2_b32 s1, 15, s1 2963; GFX10-NEXT: v_lshrrev_b16 v0, s2, v0 2964; GFX10-NEXT: s_lshl_b32 s0, s0, s3 2965; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 2966; GFX10-NEXT: s_lshl_b32 s0, s0, s1 2967; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 2968; GFX10-NEXT: ; return to shader part epilog 2969 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 2970 %cast.result = bitcast i16 %result to half 2971 ret half %cast.result 2972} 2973 2974define amdgpu_ps half @v_fshr_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) { 2975; GFX6-LABEL: v_fshr_i16_vss: 2976; GFX6: ; %bb.0: 2977; GFX6-NEXT: s_and_b32 s2, s1, 15 2978; GFX6-NEXT: s_andn2_b32 s1, 15, s1 2979; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2980; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 2981; GFX6-NEXT: v_lshlrev_b32_e32 v0, s1, v0 2982; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000 2983; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 2984; GFX6-NEXT: s_lshr_b32 s0, s0, s1 2985; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 2986; GFX6-NEXT: ; return to shader part epilog 2987; 2988; GFX8-LABEL: v_fshr_i16_vss: 2989; GFX8: ; %bb.0: 2990; GFX8-NEXT: s_and_b32 s2, s1, 15 2991; GFX8-NEXT: s_andn2_b32 s1, 15, s1 2992; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 2993; GFX8-NEXT: v_lshlrev_b16_e32 v0, s1, v0 2994; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 2995; GFX8-NEXT: s_bfe_u32 s1, s2, 0x100000 2996; GFX8-NEXT: s_lshr_b32 s0, s0, s1 2997; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 2998; GFX8-NEXT: ; return to shader part epilog 2999; 3000; GFX9-LABEL: v_fshr_i16_vss: 3001; GFX9: ; %bb.0: 3002; GFX9-NEXT: s_and_b32 s2, s1, 15 3003; GFX9-NEXT: s_andn2_b32 s1, 15, s1 3004; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 3005; GFX9-NEXT: v_lshlrev_b16_e32 v0, s1, v0 3006; GFX9-NEXT: s_bfe_u32 s0, s0, 0x100000 3007; GFX9-NEXT: s_bfe_u32 s1, s2, 0x100000 3008; GFX9-NEXT: s_lshr_b32 s0, s0, s1 3009; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 3010; GFX9-NEXT: ; return to shader part epilog 3011; 3012; GFX10-LABEL: v_fshr_i16_vss: 3013; GFX10: ; %bb.0: 3014; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 3015; GFX10-NEXT: s_andn2_b32 s2, 15, s1 3016; GFX10-NEXT: s_and_b32 s1, s1, 15 3017; GFX10-NEXT: s_bfe_u32 s0, s0, 0x100000 3018; GFX10-NEXT: s_bfe_u32 s1, s1, 0x100000 3019; GFX10-NEXT: v_lshlrev_b16 v0, s2, v0 3020; GFX10-NEXT: s_lshr_b32 s0, s0, s1 3021; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 3022; GFX10-NEXT: ; return to shader part epilog 3023 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 3024 %cast.result = bitcast i16 %result to half 3025 ret half %cast.result 3026} 3027 3028define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) { 3029; GFX6-LABEL: s_fshr_v2i16: 3030; GFX6: ; %bb.0: 3031; GFX6-NEXT: s_lshl_b32 s5, s5, 16 3032; GFX6-NEXT: s_and_b32 s4, s4, 0xffff 3033; GFX6-NEXT: s_or_b32 s4, s5, s4 3034; GFX6-NEXT: s_bfe_u32 s5, 1, 0x100000 3035; GFX6-NEXT: s_mov_b32 s6, 0xf0001 3036; GFX6-NEXT: s_lshl_b32 s0, s0, s5 3037; GFX6-NEXT: s_bfe_u32 s7, s2, s6 3038; GFX6-NEXT: s_bfe_u32 s8, 14, 0x100000 3039; GFX6-NEXT: s_lshl_b32 s1, s1, s5 3040; GFX6-NEXT: s_bfe_u32 s5, s3, s6 3041; GFX6-NEXT: s_lshr_b32 s7, s7, s8 3042; GFX6-NEXT: s_lshr_b32 s5, s5, s8 3043; GFX6-NEXT: s_xor_b32 s4, s4, -1 3044; GFX6-NEXT: s_or_b32 s0, s0, s7 3045; GFX6-NEXT: s_or_b32 s1, s1, s5 3046; GFX6-NEXT: s_lshl_b32 s2, s2, 1 3047; GFX6-NEXT: s_lshr_b32 s5, s4, 16 3048; GFX6-NEXT: s_and_b32 s7, s4, 15 3049; GFX6-NEXT: s_andn2_b32 s4, 15, s4 3050; GFX6-NEXT: s_bfe_u32 s7, s7, 0x100000 3051; GFX6-NEXT: s_bfe_u32 s2, s2, s6 3052; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000 3053; GFX6-NEXT: s_lshl_b32 s0, s0, s7 3054; GFX6-NEXT: s_lshr_b32 s2, s2, s4 3055; GFX6-NEXT: s_or_b32 s0, s0, s2 3056; GFX6-NEXT: s_and_b32 s2, s5, 15 3057; GFX6-NEXT: s_lshl_b32 s3, s3, 1 3058; GFX6-NEXT: s_andn2_b32 s4, 15, s5 3059; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 3060; GFX6-NEXT: s_lshl_b32 s1, s1, s2 3061; GFX6-NEXT: s_bfe_u32 s2, s3, s6 3062; GFX6-NEXT: s_bfe_u32 s3, s4, 0x100000 3063; GFX6-NEXT: s_lshr_b32 s2, s2, s3 3064; GFX6-NEXT: s_or_b32 s1, s1, s2 3065; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 3066; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 3067; GFX6-NEXT: s_lshl_b32 s1, s1, 16 3068; GFX6-NEXT: s_or_b32 s0, s0, s1 3069; GFX6-NEXT: ; return to shader part epilog 3070; 3071; GFX8-LABEL: s_fshr_v2i16: 3072; GFX8: ; %bb.0: 3073; GFX8-NEXT: s_bfe_u32 s5, 1, 0x100000 3074; GFX8-NEXT: s_bfe_u32 s6, s1, 0x100000 3075; GFX8-NEXT: s_bfe_u32 s7, 15, 0x100000 3076; GFX8-NEXT: s_lshr_b32 s3, s0, 16 3077; GFX8-NEXT: s_lshr_b32 s4, s1, 16 3078; GFX8-NEXT: s_lshl_b32 s0, s0, s5 3079; GFX8-NEXT: s_lshr_b32 s6, s6, s7 3080; GFX8-NEXT: s_or_b32 s0, s0, s6 3081; GFX8-NEXT: s_lshl_b32 s3, s3, s5 3082; GFX8-NEXT: s_lshr_b32 s6, s4, s7 3083; GFX8-NEXT: s_lshl_b32 s1, s1, s5 3084; GFX8-NEXT: s_xor_b32 s2, s2, -1 3085; GFX8-NEXT: s_or_b32 s3, s3, s6 3086; GFX8-NEXT: s_lshr_b32 s6, s2, 16 3087; GFX8-NEXT: s_and_b32 s7, s2, 15 3088; GFX8-NEXT: s_andn2_b32 s2, 15, s2 3089; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3090; GFX8-NEXT: s_bfe_u32 s7, s7, 0x100000 3091; GFX8-NEXT: s_lshr_b32 s1, s1, s5 3092; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3093; GFX8-NEXT: s_lshl_b32 s0, s0, s7 3094; GFX8-NEXT: s_lshr_b32 s1, s1, s2 3095; GFX8-NEXT: s_or_b32 s0, s0, s1 3096; GFX8-NEXT: s_and_b32 s1, s6, 15 3097; GFX8-NEXT: s_lshl_b32 s4, s4, s5 3098; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3099; GFX8-NEXT: s_andn2_b32 s2, 15, s6 3100; GFX8-NEXT: s_lshl_b32 s1, s3, s1 3101; GFX8-NEXT: s_bfe_u32 s3, s4, 0x100000 3102; GFX8-NEXT: s_lshr_b32 s3, s3, s5 3103; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3104; GFX8-NEXT: s_lshr_b32 s2, s3, s2 3105; GFX8-NEXT: s_or_b32 s1, s1, s2 3106; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3107; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 3108; GFX8-NEXT: s_lshl_b32 s1, s1, 16 3109; GFX8-NEXT: s_or_b32 s0, s0, s1 3110; GFX8-NEXT: ; return to shader part epilog 3111; 3112; GFX9-LABEL: s_fshr_v2i16: 3113; GFX9: ; %bb.0: 3114; GFX9-NEXT: s_mov_b32 s3, 0xf000f 3115; GFX9-NEXT: s_and_b32 s4, s2, s3 3116; GFX9-NEXT: s_andn2_b32 s2, s3, s2 3117; GFX9-NEXT: s_lshr_b32 s3, s0, 16 3118; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 3119; GFX9-NEXT: s_lshl_b32 s3, s3, 1 3120; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3 3121; GFX9-NEXT: s_lshr_b32 s3, s0, 16 3122; GFX9-NEXT: s_lshr_b32 s5, s2, 16 3123; GFX9-NEXT: s_lshl_b32 s0, s0, s2 3124; GFX9-NEXT: s_lshl_b32 s2, s3, s5 3125; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3126; GFX9-NEXT: s_lshr_b32 s2, s1, 16 3127; GFX9-NEXT: s_and_b32 s1, s1, 0xffff 3128; GFX9-NEXT: s_lshr_b32 s3, s4, 16 3129; GFX9-NEXT: s_lshr_b32 s1, s1, s4 3130; GFX9-NEXT: s_lshr_b32 s2, s2, s3 3131; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2 3132; GFX9-NEXT: s_or_b32 s0, s0, s1 3133; GFX9-NEXT: ; return to shader part epilog 3134; 3135; GFX10-LABEL: s_fshr_v2i16: 3136; GFX10: ; %bb.0: 3137; GFX10-NEXT: s_lshr_b32 s4, s0, 16 3138; GFX10-NEXT: s_mov_b32 s3, 0xf000f 3139; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 3140; GFX10-NEXT: s_lshl_b32 s4, s4, 1 3141; GFX10-NEXT: s_and_b32 s5, s2, s3 3142; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4 3143; GFX10-NEXT: s_andn2_b32 s2, s3, s2 3144; GFX10-NEXT: s_lshr_b32 s3, s0, 16 3145; GFX10-NEXT: s_lshr_b32 s4, s2, 16 3146; GFX10-NEXT: s_lshl_b32 s0, s0, s2 3147; GFX10-NEXT: s_lshl_b32 s2, s3, s4 3148; GFX10-NEXT: s_lshr_b32 s3, s1, 16 3149; GFX10-NEXT: s_and_b32 s1, s1, 0xffff 3150; GFX10-NEXT: s_lshr_b32 s4, s5, 16 3151; GFX10-NEXT: s_lshr_b32 s1, s1, s5 3152; GFX10-NEXT: s_lshr_b32 s3, s3, s4 3153; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3154; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s3 3155; GFX10-NEXT: s_or_b32 s0, s0, s1 3156; GFX10-NEXT: ; return to shader part epilog 3157 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 3158 %cast = bitcast <2 x i16> %result to i32 3159 ret i32 %cast 3160} 3161 3162define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) { 3163; GFX6-LABEL: v_fshr_v2i16: 3164; GFX6: ; %bb.0: 3165; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3166; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5 3167; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 3168; GFX6-NEXT: v_or_b32_e32 v4, v5, v4 3169; GFX6-NEXT: s_bfe_u32 s4, 1, 0x100000 3170; GFX6-NEXT: v_bfe_u32 v5, v2, 1, 15 3171; GFX6-NEXT: s_bfe_u32 s5, 14, 0x100000 3172; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0 3173; GFX6-NEXT: v_lshrrev_b32_e32 v5, s5, v5 3174; GFX6-NEXT: v_or_b32_e32 v0, v0, v5 3175; GFX6-NEXT: v_bfe_u32 v5, v3, 1, 15 3176; GFX6-NEXT: v_lshlrev_b32_e32 v1, s4, v1 3177; GFX6-NEXT: v_lshrrev_b32_e32 v5, s5, v5 3178; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 3179; GFX6-NEXT: v_or_b32_e32 v1, v1, v5 3180; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v4 3181; GFX6-NEXT: v_and_b32_e32 v6, 15, v4 3182; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 3183; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2 3184; GFX6-NEXT: v_and_b32_e32 v4, 15, v4 3185; GFX6-NEXT: v_bfe_u32 v6, v6, 0, 16 3186; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15 3187; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16 3188; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0 3189; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 3190; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 3191; GFX6-NEXT: v_and_b32_e32 v2, 15, v5 3192; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5 3193; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 3194; GFX6-NEXT: v_and_b32_e32 v4, 15, v4 3195; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16 3196; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1 3197; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15 3198; GFX6-NEXT: v_bfe_u32 v3, v4, 0, 16 3199; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2 3200; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 3201; GFX6-NEXT: s_setpc_b64 s[30:31] 3202; 3203; GFX8-LABEL: v_fshr_v2i16: 3204; GFX8: ; %bb.0: 3205; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3206; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v0 3207; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v1 3208; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 3209; GFX8-NEXT: v_mov_b32_e32 v4, 1 3210; GFX8-NEXT: v_mov_b32_e32 v5, 15 3211; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3212; GFX8-NEXT: v_lshrrev_b16_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3213; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 3214; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 3215; GFX8-NEXT: v_lshlrev_b16_e32 v5, 1, v1 3216; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3217; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 3218; GFX8-NEXT: v_and_b32_e32 v6, 15, v2 3219; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 3220; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 3221; GFX8-NEXT: v_lshrrev_b16_e32 v5, 1, v5 3222; GFX8-NEXT: v_lshlrev_b16_e32 v3, v6, v3 3223; GFX8-NEXT: v_lshrrev_b16_e32 v2, v2, v5 3224; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 3225; GFX8-NEXT: v_and_b32_e32 v3, 15, v4 3226; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 3227; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 3228; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v1 3229; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0 3230; GFX8-NEXT: v_lshrrev_b16_e32 v1, v4, v1 3231; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3232; GFX8-NEXT: v_mov_b32_e32 v1, 16 3233; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 3234; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3235; GFX8-NEXT: s_setpc_b64 s[30:31] 3236; 3237; GFX9-LABEL: v_fshr_v2i16: 3238; GFX9: ; %bb.0: 3239; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3240; GFX9-NEXT: s_mov_b32 s4, 0xf000f 3241; GFX9-NEXT: v_and_b32_e32 v3, s4, v2 3242; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 3243; GFX9-NEXT: v_and_b32_e32 v2, s4, v2 3244; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 3245; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0 3246; GFX9-NEXT: v_pk_lshrrev_b16 v1, v3, v1 3247; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3248; GFX9-NEXT: s_setpc_b64 s[30:31] 3249; 3250; GFX10-LABEL: v_fshr_v2i16: 3251; GFX10: ; %bb.0: 3252; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3253; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3254; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 3255; GFX10-NEXT: s_mov_b32 s4, 0xf000f 3256; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 3257; GFX10-NEXT: v_and_b32_e32 v2, s4, v2 3258; GFX10-NEXT: v_and_b32_e32 v3, s4, v3 3259; GFX10-NEXT: v_pk_lshrrev_b16 v1, v2, v1 3260; GFX10-NEXT: v_pk_lshlrev_b16 v0, v3, v0 3261; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3262; GFX10-NEXT: s_setpc_b64 s[30:31] 3263 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 3264 ret <2 x i16> %result 3265} 3266 3267define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) { 3268; GFX6-LABEL: v_fshr_v2i16_4_8: 3269; GFX6: ; %bb.0: 3270; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3271; GFX6-NEXT: s_bfe_u32 s4, 12, 0x100000 3272; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0 3273; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15 3274; GFX6-NEXT: s_bfe_u32 s4, 3, 0x100000 3275; GFX6-NEXT: v_lshrrev_b32_e32 v2, s4, v2 3276; GFX6-NEXT: s_bfe_u32 s4, 8, 0x100000 3277; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 3278; GFX6-NEXT: v_lshlrev_b32_e32 v1, s4, v1 3279; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15 3280; GFX6-NEXT: s_bfe_u32 s4, 7, 0x100000 3281; GFX6-NEXT: v_lshrrev_b32_e32 v2, s4, v2 3282; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 3283; GFX6-NEXT: s_setpc_b64 s[30:31] 3284; 3285; GFX8-LABEL: v_fshr_v2i16_4_8: 3286; GFX8: ; %bb.0: 3287; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3288; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0 3289; GFX8-NEXT: v_lshlrev_b16_e32 v0, 12, v0 3290; GFX8-NEXT: v_lshrrev_b16_e32 v3, 4, v1 3291; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 3292; GFX8-NEXT: v_mov_b32_e32 v3, 8 3293; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v2 3294; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3295; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 3296; GFX8-NEXT: v_mov_b32_e32 v2, 16 3297; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 3298; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3299; GFX8-NEXT: s_setpc_b64 s[30:31] 3300; 3301; GFX9-LABEL: v_fshr_v2i16_4_8: 3302; GFX9: ; %bb.0: 3303; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3304; GFX9-NEXT: v_mov_b32_e32 v2, 0x8000c 3305; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0 3306; GFX9-NEXT: v_mov_b32_e32 v2, 0x80004 3307; GFX9-NEXT: v_pk_lshrrev_b16 v1, v2, v1 3308; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3309; GFX9-NEXT: s_setpc_b64 s[30:31] 3310; 3311; GFX10-LABEL: v_fshr_v2i16_4_8: 3312; GFX10: ; %bb.0: 3313; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3314; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 3315; GFX10-NEXT: v_pk_lshlrev_b16 v0, 0x8000c, v0 3316; GFX10-NEXT: v_pk_lshrrev_b16 v1, 0x80004, v1 3317; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3318; GFX10-NEXT: s_setpc_b64 s[30:31] 3319 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> <i16 4, i16 8>) 3320 ret <2 x i16> %result 3321} 3322 3323define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) { 3324; GFX6-LABEL: v_fshr_v2i16_ssv: 3325; GFX6: ; %bb.0: 3326; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3327; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 3328; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 3329; GFX6-NEXT: s_mov_b32 s5, 0xf0001 3330; GFX6-NEXT: s_bfe_u32 s4, 1, 0x100000 3331; GFX6-NEXT: s_bfe_u32 s6, s2, s5 3332; GFX6-NEXT: s_bfe_u32 s7, 14, 0x100000 3333; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 3334; GFX6-NEXT: s_lshl_b32 s0, s0, s4 3335; GFX6-NEXT: s_lshr_b32 s6, s6, s7 3336; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 3337; GFX6-NEXT: v_and_b32_e32 v2, 15, v0 3338; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 3339; GFX6-NEXT: s_or_b32 s0, s0, s6 3340; GFX6-NEXT: s_lshl_b32 s2, s2, 1 3341; GFX6-NEXT: v_and_b32_e32 v0, 15, v0 3342; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16 3343; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 3344; GFX6-NEXT: s_bfe_u32 s0, s2, s5 3345; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 3346; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0 3347; GFX6-NEXT: s_lshl_b32 s1, s1, s4 3348; GFX6-NEXT: s_bfe_u32 s4, s3, s5 3349; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 3350; GFX6-NEXT: v_and_b32_e32 v2, 15, v1 3351; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1 3352; GFX6-NEXT: s_lshr_b32 s4, s4, s7 3353; GFX6-NEXT: s_lshl_b32 s3, s3, 1 3354; GFX6-NEXT: v_and_b32_e32 v1, 15, v1 3355; GFX6-NEXT: s_or_b32 s1, s1, s4 3356; GFX6-NEXT: v_bfe_u32 v2, v2, 0, 16 3357; GFX6-NEXT: s_bfe_u32 s0, s3, s5 3358; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 3359; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2 3360; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 3361; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 3362; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 3363; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 3364; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3365; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3366; GFX6-NEXT: ; return to shader part epilog 3367; 3368; GFX8-LABEL: v_fshr_v2i16_ssv: 3369; GFX8: ; %bb.0: 3370; GFX8-NEXT: s_bfe_u32 s4, 1, 0x100000 3371; GFX8-NEXT: s_bfe_u32 s5, s1, 0x100000 3372; GFX8-NEXT: s_bfe_u32 s6, 15, 0x100000 3373; GFX8-NEXT: s_lshr_b32 s2, s0, 16 3374; GFX8-NEXT: s_lshl_b32 s0, s0, s4 3375; GFX8-NEXT: s_lshr_b32 s5, s5, s6 3376; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 3377; GFX8-NEXT: s_lshr_b32 s3, s1, 16 3378; GFX8-NEXT: s_or_b32 s0, s0, s5 3379; GFX8-NEXT: s_lshl_b32 s1, s1, s4 3380; GFX8-NEXT: v_and_b32_e32 v2, 15, v0 3381; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0 3382; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 3383; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s0 3384; GFX8-NEXT: s_bfe_u32 s0, s1, 0x100000 3385; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 3386; GFX8-NEXT: s_lshr_b32 s0, s0, s4 3387; GFX8-NEXT: s_lshr_b32 s5, s3, s6 3388; GFX8-NEXT: s_lshl_b32 s3, s3, s4 3389; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0 3390; GFX8-NEXT: s_lshl_b32 s2, s2, s4 3391; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 3392; GFX8-NEXT: v_and_b32_e32 v2, 15, v1 3393; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1 3394; GFX8-NEXT: s_bfe_u32 s0, s3, 0x100000 3395; GFX8-NEXT: s_or_b32 s2, s2, s5 3396; GFX8-NEXT: v_and_b32_e32 v1, 15, v1 3397; GFX8-NEXT: s_lshr_b32 s0, s0, s4 3398; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s2 3399; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s0 3400; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 3401; GFX8-NEXT: v_mov_b32_e32 v2, 16 3402; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 3403; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3404; GFX8-NEXT: ; return to shader part epilog 3405; 3406; GFX9-LABEL: v_fshr_v2i16_ssv: 3407; GFX9: ; %bb.0: 3408; GFX9-NEXT: s_mov_b32 s2, 0xf000f 3409; GFX9-NEXT: v_and_b32_e32 v1, s2, v0 3410; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 3411; GFX9-NEXT: v_and_b32_e32 v0, s2, v0 3412; GFX9-NEXT: s_lshr_b32 s2, s0, 16 3413; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 3414; GFX9-NEXT: s_lshl_b32 s2, s2, 1 3415; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3416; GFX9-NEXT: v_pk_lshlrev_b16 v0, v0, s0 3417; GFX9-NEXT: v_pk_lshrrev_b16 v1, v1, s1 3418; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3419; GFX9-NEXT: ; return to shader part epilog 3420; 3421; GFX10-LABEL: v_fshr_v2i16_ssv: 3422; GFX10: ; %bb.0: 3423; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 3424; GFX10-NEXT: s_mov_b32 s2, 0xf000f 3425; GFX10-NEXT: s_lshr_b32 s3, s0, 16 3426; GFX10-NEXT: v_and_b32_e32 v0, s2, v0 3427; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 3428; GFX10-NEXT: v_and_b32_e32 v1, s2, v1 3429; GFX10-NEXT: s_lshl_b32 s2, s3, 1 3430; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3431; GFX10-NEXT: v_pk_lshrrev_b16 v0, v0, s1 3432; GFX10-NEXT: v_pk_lshlrev_b16 v1, v1, s0 3433; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 3434; GFX10-NEXT: ; return to shader part epilog 3435 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 3436 %cast = bitcast <2 x i16> %result to float 3437 ret float %cast 3438} 3439 3440define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) { 3441; GFX6-LABEL: v_fshr_v2i16_svs: 3442; GFX6: ; %bb.0: 3443; GFX6-NEXT: s_lshl_b32 s3, s3, 16 3444; GFX6-NEXT: s_and_b32 s2, s2, 0xffff 3445; GFX6-NEXT: s_or_b32 s2, s3, s2 3446; GFX6-NEXT: s_bfe_u32 s3, 1, 0x100000 3447; GFX6-NEXT: v_bfe_u32 v2, v0, 1, 15 3448; GFX6-NEXT: s_bfe_u32 s4, 14, 0x100000 3449; GFX6-NEXT: s_lshl_b32 s0, s0, s3 3450; GFX6-NEXT: v_lshrrev_b32_e32 v2, s4, v2 3451; GFX6-NEXT: v_bfe_u32 v3, v1, 1, 15 3452; GFX6-NEXT: v_or_b32_e32 v2, s0, v2 3453; GFX6-NEXT: s_lshl_b32 s0, s1, s3 3454; GFX6-NEXT: v_lshrrev_b32_e32 v3, s4, v3 3455; GFX6-NEXT: v_or_b32_e32 v3, s0, v3 3456; GFX6-NEXT: s_xor_b32 s0, s2, -1 3457; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 3458; GFX6-NEXT: s_lshr_b32 s1, s0, 16 3459; GFX6-NEXT: s_and_b32 s2, s0, 15 3460; GFX6-NEXT: s_andn2_b32 s0, 15, s0 3461; GFX6-NEXT: v_bfe_u32 v0, v0, 1, 15 3462; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 3463; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 3464; GFX6-NEXT: v_lshrrev_b32_e32 v0, s0, v0 3465; GFX6-NEXT: s_and_b32 s0, s1, 15 3466; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 3467; GFX6-NEXT: v_lshlrev_b32_e32 v2, s2, v2 3468; GFX6-NEXT: s_andn2_b32 s1, 15, s1 3469; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 3470; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 3471; GFX6-NEXT: v_lshlrev_b32_e32 v2, s0, v3 3472; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15 3473; GFX6-NEXT: s_bfe_u32 s0, s1, 0x100000 3474; GFX6-NEXT: v_lshrrev_b32_e32 v1, s0, v1 3475; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 3476; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 3477; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 3478; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3479; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3480; GFX6-NEXT: ; return to shader part epilog 3481; 3482; GFX8-LABEL: v_fshr_v2i16_svs: 3483; GFX8: ; %bb.0: 3484; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000 3485; GFX8-NEXT: s_lshr_b32 s2, s0, 16 3486; GFX8-NEXT: s_lshl_b32 s0, s0, s3 3487; GFX8-NEXT: v_lshrrev_b16_e32 v1, 15, v0 3488; GFX8-NEXT: v_mov_b32_e32 v2, 15 3489; GFX8-NEXT: v_or_b32_e32 v1, s0, v1 3490; GFX8-NEXT: s_lshl_b32 s0, s2, s3 3491; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3492; GFX8-NEXT: v_or_b32_e32 v2, s0, v2 3493; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v0 3494; GFX8-NEXT: v_mov_b32_e32 v4, 1 3495; GFX8-NEXT: s_xor_b32 s0, s1, -1 3496; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3497; GFX8-NEXT: s_lshr_b32 s1, s0, 16 3498; GFX8-NEXT: s_and_b32 s2, s0, 15 3499; GFX8-NEXT: s_andn2_b32 s0, 15, s0 3500; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3 3501; GFX8-NEXT: v_lshrrev_b16_e32 v3, s0, v3 3502; GFX8-NEXT: s_and_b32 s0, s1, 15 3503; GFX8-NEXT: s_andn2_b32 s1, 15, s1 3504; GFX8-NEXT: v_lshrrev_b16_e32 v0, 1, v0 3505; GFX8-NEXT: v_lshlrev_b16_e32 v2, s0, v2 3506; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0 3507; GFX8-NEXT: v_lshlrev_b16_e32 v1, s2, v1 3508; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 3509; GFX8-NEXT: v_mov_b32_e32 v2, 16 3510; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 3511; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 3512; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3513; GFX8-NEXT: ; return to shader part epilog 3514; 3515; GFX9-LABEL: v_fshr_v2i16_svs: 3516; GFX9: ; %bb.0: 3517; GFX9-NEXT: s_mov_b32 s2, 0xf000f 3518; GFX9-NEXT: s_and_b32 s3, s1, s2 3519; GFX9-NEXT: s_andn2_b32 s1, s2, s1 3520; GFX9-NEXT: s_lshr_b32 s2, s0, 16 3521; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 3522; GFX9-NEXT: s_lshl_b32 s2, s2, 1 3523; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3524; GFX9-NEXT: s_lshr_b32 s2, s0, 16 3525; GFX9-NEXT: s_lshr_b32 s4, s1, 16 3526; GFX9-NEXT: s_lshl_b32 s0, s0, s1 3527; GFX9-NEXT: s_lshl_b32 s1, s2, s4 3528; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 3529; GFX9-NEXT: v_pk_lshrrev_b16 v0, s3, v0 3530; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 3531; GFX9-NEXT: ; return to shader part epilog 3532; 3533; GFX10-LABEL: v_fshr_v2i16_svs: 3534; GFX10: ; %bb.0: 3535; GFX10-NEXT: s_lshr_b32 s3, s0, 16 3536; GFX10-NEXT: s_mov_b32 s2, 0xf000f 3537; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 3538; GFX10-NEXT: s_lshl_b32 s3, s3, 1 3539; GFX10-NEXT: s_and_b32 s4, s1, s2 3540; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s3 3541; GFX10-NEXT: s_andn2_b32 s1, s2, s1 3542; GFX10-NEXT: s_lshr_b32 s2, s0, 16 3543; GFX10-NEXT: s_lshr_b32 s3, s1, 16 3544; GFX10-NEXT: v_pk_lshrrev_b16 v0, s4, v0 3545; GFX10-NEXT: s_lshl_b32 s0, s0, s1 3546; GFX10-NEXT: s_lshl_b32 s1, s2, s3 3547; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 3548; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 3549; GFX10-NEXT: ; return to shader part epilog 3550 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 3551 %cast = bitcast <2 x i16> %result to float 3552 ret float %cast 3553} 3554 3555define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) { 3556; GFX6-LABEL: v_fshr_v2i16_vss: 3557; GFX6: ; %bb.0: 3558; GFX6-NEXT: s_lshl_b32 s3, s3, 16 3559; GFX6-NEXT: s_and_b32 s2, s2, 0xffff 3560; GFX6-NEXT: s_or_b32 s2, s3, s2 3561; GFX6-NEXT: s_bfe_u32 s3, 1, 0x100000 3562; GFX6-NEXT: s_mov_b32 s4, 0xf0001 3563; GFX6-NEXT: v_lshlrev_b32_e32 v0, s3, v0 3564; GFX6-NEXT: s_bfe_u32 s5, s0, s4 3565; GFX6-NEXT: s_bfe_u32 s6, 14, 0x100000 3566; GFX6-NEXT: v_lshlrev_b32_e32 v1, s3, v1 3567; GFX6-NEXT: s_bfe_u32 s3, s1, s4 3568; GFX6-NEXT: s_lshr_b32 s5, s5, s6 3569; GFX6-NEXT: s_lshr_b32 s3, s3, s6 3570; GFX6-NEXT: s_xor_b32 s2, s2, -1 3571; GFX6-NEXT: v_or_b32_e32 v0, s5, v0 3572; GFX6-NEXT: v_or_b32_e32 v1, s3, v1 3573; GFX6-NEXT: s_lshl_b32 s0, s0, 1 3574; GFX6-NEXT: s_lshr_b32 s3, s2, 16 3575; GFX6-NEXT: s_and_b32 s5, s2, 15 3576; GFX6-NEXT: s_andn2_b32 s2, 15, s2 3577; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000 3578; GFX6-NEXT: s_bfe_u32 s0, s0, s4 3579; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 3580; GFX6-NEXT: v_lshlrev_b32_e32 v0, s5, v0 3581; GFX6-NEXT: s_lshr_b32 s0, s0, s2 3582; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 3583; GFX6-NEXT: s_and_b32 s0, s3, 15 3584; GFX6-NEXT: s_lshl_b32 s1, s1, 1 3585; GFX6-NEXT: s_andn2_b32 s2, 15, s3 3586; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 3587; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1 3588; GFX6-NEXT: s_bfe_u32 s0, s1, s4 3589; GFX6-NEXT: s_bfe_u32 s1, s2, 0x100000 3590; GFX6-NEXT: s_lshr_b32 s0, s0, s1 3591; GFX6-NEXT: v_or_b32_e32 v1, s0, v1 3592; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 16 3593; GFX6-NEXT: v_bfe_u32 v0, v0, 0, 16 3594; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3595; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3596; GFX6-NEXT: ; return to shader part epilog 3597; 3598; GFX8-LABEL: v_fshr_v2i16_vss: 3599; GFX8: ; %bb.0: 3600; GFX8-NEXT: s_bfe_u32 s3, s0, 0x100000 3601; GFX8-NEXT: s_bfe_u32 s4, 15, 0x100000 3602; GFX8-NEXT: s_lshr_b32 s2, s0, 16 3603; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v0 3604; GFX8-NEXT: s_lshr_b32 s3, s3, s4 3605; GFX8-NEXT: v_mov_b32_e32 v2, 1 3606; GFX8-NEXT: v_or_b32_e32 v1, s3, v1 3607; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3608; GFX8-NEXT: s_lshr_b32 s3, s2, s4 3609; GFX8-NEXT: v_or_b32_e32 v0, s3, v0 3610; GFX8-NEXT: s_bfe_u32 s3, 1, 0x100000 3611; GFX8-NEXT: s_lshl_b32 s0, s0, s3 3612; GFX8-NEXT: s_xor_b32 s1, s1, -1 3613; GFX8-NEXT: s_lshr_b32 s4, s1, 16 3614; GFX8-NEXT: s_and_b32 s5, s1, 15 3615; GFX8-NEXT: s_andn2_b32 s1, 15, s1 3616; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 3617; GFX8-NEXT: s_lshr_b32 s0, s0, s3 3618; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3619; GFX8-NEXT: v_lshlrev_b16_e32 v1, s5, v1 3620; GFX8-NEXT: s_lshr_b32 s0, s0, s1 3621; GFX8-NEXT: s_lshl_b32 s2, s2, s3 3622; GFX8-NEXT: v_or_b32_e32 v1, s0, v1 3623; GFX8-NEXT: s_and_b32 s0, s4, 15 3624; GFX8-NEXT: s_andn2_b32 s1, 15, s4 3625; GFX8-NEXT: v_lshlrev_b16_e32 v0, s0, v0 3626; GFX8-NEXT: s_bfe_u32 s0, s2, 0x100000 3627; GFX8-NEXT: s_lshr_b32 s0, s0, s3 3628; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3629; GFX8-NEXT: s_lshr_b32 s0, s0, s1 3630; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 3631; GFX8-NEXT: v_mov_b32_e32 v2, 16 3632; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 3633; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3634; GFX8-NEXT: ; return to shader part epilog 3635; 3636; GFX9-LABEL: v_fshr_v2i16_vss: 3637; GFX9: ; %bb.0: 3638; GFX9-NEXT: s_mov_b32 s2, 0xf000f 3639; GFX9-NEXT: s_and_b32 s3, s1, s2 3640; GFX9-NEXT: s_andn2_b32 s1, s2, s1 3641; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 3642; GFX9-NEXT: v_pk_lshlrev_b16 v0, s1, v0 3643; GFX9-NEXT: s_lshr_b32 s1, s0, 16 3644; GFX9-NEXT: s_and_b32 s0, s0, 0xffff 3645; GFX9-NEXT: s_lshr_b32 s2, s3, 16 3646; GFX9-NEXT: s_lshr_b32 s0, s0, s3 3647; GFX9-NEXT: s_lshr_b32 s1, s1, s2 3648; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 3649; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 3650; GFX9-NEXT: ; return to shader part epilog 3651; 3652; GFX10-LABEL: v_fshr_v2i16_vss: 3653; GFX10: ; %bb.0: 3654; GFX10-NEXT: s_mov_b32 s2, 0xf000f 3655; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 3656; GFX10-NEXT: s_and_b32 s3, s1, s2 3657; GFX10-NEXT: s_andn2_b32 s1, s2, s1 3658; GFX10-NEXT: s_lshr_b32 s2, s0, 16 3659; GFX10-NEXT: s_and_b32 s0, s0, 0xffff 3660; GFX10-NEXT: s_lshr_b32 s4, s3, 16 3661; GFX10-NEXT: v_pk_lshlrev_b16 v0, s1, v0 3662; GFX10-NEXT: s_lshr_b32 s0, s0, s3 3663; GFX10-NEXT: s_lshr_b32 s1, s2, s4 3664; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 3665; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 3666; GFX10-NEXT: ; return to shader part epilog 3667 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 3668 %cast = bitcast <2 x i16> %result to float 3669 ret float %cast 3670} 3671 3672; ; FIXME 3673; define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) { 3674; %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) 3675; %cast = bitcast <3 x i16> %result to i48 3676; ret i48 %cast 3677; } 3678 3679; ; FIXME 3680; define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) { 3681; %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) 3682; %cast.result = bitcast <3 x i16> %result to <3 x half> 3683; ret <3 x half> %cast.result 3684; } 3685 3686define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) { 3687; GFX6-LABEL: s_fshr_v4i16: 3688; GFX6: ; %bb.0: 3689; GFX6-NEXT: s_mov_b32 s12, 0xffff 3690; GFX6-NEXT: s_lshl_b32 s9, s9, 16 3691; GFX6-NEXT: s_and_b32 s8, s8, s12 3692; GFX6-NEXT: s_or_b32 s8, s9, s8 3693; GFX6-NEXT: s_lshl_b32 s9, s11, 16 3694; GFX6-NEXT: s_and_b32 s10, s10, s12 3695; GFX6-NEXT: s_mov_b32 s11, 0xf0001 3696; GFX6-NEXT: s_or_b32 s9, s9, s10 3697; GFX6-NEXT: s_bfe_u32 s10, 1, 0x100000 3698; GFX6-NEXT: s_bfe_u32 s12, s4, s11 3699; GFX6-NEXT: s_bfe_u32 s13, 14, 0x100000 3700; GFX6-NEXT: s_lshl_b32 s0, s0, s10 3701; GFX6-NEXT: s_lshr_b32 s12, s12, s13 3702; GFX6-NEXT: s_or_b32 s0, s0, s12 3703; GFX6-NEXT: s_bfe_u32 s12, s5, s11 3704; GFX6-NEXT: s_lshl_b32 s1, s1, s10 3705; GFX6-NEXT: s_lshr_b32 s12, s12, s13 3706; GFX6-NEXT: s_xor_b32 s8, s8, -1 3707; GFX6-NEXT: s_or_b32 s1, s1, s12 3708; GFX6-NEXT: s_lshl_b32 s4, s4, 1 3709; GFX6-NEXT: s_lshr_b32 s12, s8, 16 3710; GFX6-NEXT: s_and_b32 s14, s8, 15 3711; GFX6-NEXT: s_andn2_b32 s8, 15, s8 3712; GFX6-NEXT: s_bfe_u32 s14, s14, 0x100000 3713; GFX6-NEXT: s_bfe_u32 s4, s4, s11 3714; GFX6-NEXT: s_bfe_u32 s8, s8, 0x100000 3715; GFX6-NEXT: s_lshl_b32 s0, s0, s14 3716; GFX6-NEXT: s_lshr_b32 s4, s4, s8 3717; GFX6-NEXT: s_or_b32 s0, s0, s4 3718; GFX6-NEXT: s_and_b32 s4, s12, 15 3719; GFX6-NEXT: s_lshl_b32 s5, s5, 1 3720; GFX6-NEXT: s_andn2_b32 s8, 15, s12 3721; GFX6-NEXT: s_bfe_u32 s4, s4, 0x100000 3722; GFX6-NEXT: s_lshl_b32 s1, s1, s4 3723; GFX6-NEXT: s_bfe_u32 s4, s5, s11 3724; GFX6-NEXT: s_bfe_u32 s5, s8, 0x100000 3725; GFX6-NEXT: s_lshr_b32 s4, s4, s5 3726; GFX6-NEXT: s_or_b32 s1, s1, s4 3727; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 3728; GFX6-NEXT: s_bfe_u32 s0, s0, 0x100000 3729; GFX6-NEXT: s_lshl_b32 s1, s1, 16 3730; GFX6-NEXT: s_or_b32 s0, s0, s1 3731; GFX6-NEXT: s_lshl_b32 s1, s2, s10 3732; GFX6-NEXT: s_bfe_u32 s2, s6, s11 3733; GFX6-NEXT: s_lshr_b32 s2, s2, s13 3734; GFX6-NEXT: s_or_b32 s1, s1, s2 3735; GFX6-NEXT: s_lshl_b32 s2, s3, s10 3736; GFX6-NEXT: s_bfe_u32 s3, s7, s11 3737; GFX6-NEXT: s_lshr_b32 s3, s3, s13 3738; GFX6-NEXT: s_xor_b32 s5, s9, -1 3739; GFX6-NEXT: s_or_b32 s2, s2, s3 3740; GFX6-NEXT: s_lshl_b32 s3, s6, 1 3741; GFX6-NEXT: s_lshl_b32 s4, s7, 1 3742; GFX6-NEXT: s_lshr_b32 s6, s5, 16 3743; GFX6-NEXT: s_and_b32 s7, s5, 15 3744; GFX6-NEXT: s_andn2_b32 s5, 15, s5 3745; GFX6-NEXT: s_bfe_u32 s7, s7, 0x100000 3746; GFX6-NEXT: s_bfe_u32 s3, s3, s11 3747; GFX6-NEXT: s_bfe_u32 s5, s5, 0x100000 3748; GFX6-NEXT: s_lshl_b32 s1, s1, s7 3749; GFX6-NEXT: s_lshr_b32 s3, s3, s5 3750; GFX6-NEXT: s_or_b32 s1, s1, s3 3751; GFX6-NEXT: s_and_b32 s3, s6, 15 3752; GFX6-NEXT: s_andn2_b32 s5, 15, s6 3753; GFX6-NEXT: s_bfe_u32 s3, s3, 0x100000 3754; GFX6-NEXT: s_lshl_b32 s2, s2, s3 3755; GFX6-NEXT: s_bfe_u32 s3, s4, s11 3756; GFX6-NEXT: s_bfe_u32 s4, s5, 0x100000 3757; GFX6-NEXT: s_lshr_b32 s3, s3, s4 3758; GFX6-NEXT: s_or_b32 s2, s2, s3 3759; GFX6-NEXT: s_bfe_u32 s2, s2, 0x100000 3760; GFX6-NEXT: s_bfe_u32 s1, s1, 0x100000 3761; GFX6-NEXT: s_lshl_b32 s2, s2, 16 3762; GFX6-NEXT: s_or_b32 s1, s1, s2 3763; GFX6-NEXT: ; return to shader part epilog 3764; 3765; GFX8-LABEL: s_fshr_v4i16: 3766; GFX8: ; %bb.0: 3767; GFX8-NEXT: s_bfe_u32 s8, 1, 0x100000 3768; GFX8-NEXT: s_bfe_u32 s9, s2, 0x100000 3769; GFX8-NEXT: s_bfe_u32 s10, 15, 0x100000 3770; GFX8-NEXT: s_lshr_b32 s6, s0, 16 3771; GFX8-NEXT: s_lshr_b32 s7, s2, 16 3772; GFX8-NEXT: s_lshl_b32 s0, s0, s8 3773; GFX8-NEXT: s_lshr_b32 s9, s9, s10 3774; GFX8-NEXT: s_or_b32 s0, s0, s9 3775; GFX8-NEXT: s_lshl_b32 s6, s6, s8 3776; GFX8-NEXT: s_lshr_b32 s9, s7, s10 3777; GFX8-NEXT: s_lshl_b32 s2, s2, s8 3778; GFX8-NEXT: s_xor_b32 s4, s4, -1 3779; GFX8-NEXT: s_or_b32 s6, s6, s9 3780; GFX8-NEXT: s_lshr_b32 s9, s4, 16 3781; GFX8-NEXT: s_and_b32 s11, s4, 15 3782; GFX8-NEXT: s_andn2_b32 s4, 15, s4 3783; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3784; GFX8-NEXT: s_bfe_u32 s11, s11, 0x100000 3785; GFX8-NEXT: s_lshr_b32 s2, s2, s8 3786; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 3787; GFX8-NEXT: s_lshl_b32 s0, s0, s11 3788; GFX8-NEXT: s_lshr_b32 s2, s2, s4 3789; GFX8-NEXT: s_or_b32 s0, s0, s2 3790; GFX8-NEXT: s_and_b32 s2, s9, 15 3791; GFX8-NEXT: s_lshl_b32 s7, s7, s8 3792; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3793; GFX8-NEXT: s_andn2_b32 s4, 15, s9 3794; GFX8-NEXT: s_lshl_b32 s2, s6, s2 3795; GFX8-NEXT: s_bfe_u32 s6, s7, 0x100000 3796; GFX8-NEXT: s_lshr_b32 s6, s6, s8 3797; GFX8-NEXT: s_bfe_u32 s4, s4, 0x100000 3798; GFX8-NEXT: s_lshr_b32 s4, s6, s4 3799; GFX8-NEXT: s_or_b32 s2, s2, s4 3800; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3801; GFX8-NEXT: s_bfe_u32 s0, s0, 0x100000 3802; GFX8-NEXT: s_lshl_b32 s2, s2, 16 3803; GFX8-NEXT: s_bfe_u32 s6, s3, 0x100000 3804; GFX8-NEXT: s_or_b32 s0, s0, s2 3805; GFX8-NEXT: s_lshr_b32 s2, s1, 16 3806; GFX8-NEXT: s_lshr_b32 s4, s3, 16 3807; GFX8-NEXT: s_lshl_b32 s1, s1, s8 3808; GFX8-NEXT: s_lshr_b32 s6, s6, s10 3809; GFX8-NEXT: s_or_b32 s1, s1, s6 3810; GFX8-NEXT: s_lshl_b32 s2, s2, s8 3811; GFX8-NEXT: s_lshr_b32 s6, s4, s10 3812; GFX8-NEXT: s_lshl_b32 s3, s3, s8 3813; GFX8-NEXT: s_xor_b32 s5, s5, -1 3814; GFX8-NEXT: s_or_b32 s2, s2, s6 3815; GFX8-NEXT: s_lshr_b32 s6, s5, 16 3816; GFX8-NEXT: s_and_b32 s7, s5, 15 3817; GFX8-NEXT: s_andn2_b32 s5, 15, s5 3818; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 3819; GFX8-NEXT: s_bfe_u32 s7, s7, 0x100000 3820; GFX8-NEXT: s_lshr_b32 s3, s3, s8 3821; GFX8-NEXT: s_bfe_u32 s5, s5, 0x100000 3822; GFX8-NEXT: s_lshl_b32 s1, s1, s7 3823; GFX8-NEXT: s_lshr_b32 s3, s3, s5 3824; GFX8-NEXT: s_or_b32 s1, s1, s3 3825; GFX8-NEXT: s_and_b32 s3, s6, 15 3826; GFX8-NEXT: s_lshl_b32 s4, s4, s8 3827; GFX8-NEXT: s_bfe_u32 s3, s3, 0x100000 3828; GFX8-NEXT: s_andn2_b32 s5, 15, s6 3829; GFX8-NEXT: s_lshl_b32 s2, s2, s3 3830; GFX8-NEXT: s_bfe_u32 s3, s4, 0x100000 3831; GFX8-NEXT: s_lshr_b32 s3, s3, s8 3832; GFX8-NEXT: s_bfe_u32 s4, s5, 0x100000 3833; GFX8-NEXT: s_lshr_b32 s3, s3, s4 3834; GFX8-NEXT: s_or_b32 s2, s2, s3 3835; GFX8-NEXT: s_bfe_u32 s2, s2, 0x100000 3836; GFX8-NEXT: s_bfe_u32 s1, s1, 0x100000 3837; GFX8-NEXT: s_lshl_b32 s2, s2, 16 3838; GFX8-NEXT: s_or_b32 s1, s1, s2 3839; GFX8-NEXT: ; return to shader part epilog 3840; 3841; GFX9-LABEL: s_fshr_v4i16: 3842; GFX9: ; %bb.0: 3843; GFX9-NEXT: s_mov_b32 s8, 0x10001 3844; GFX9-NEXT: s_lshr_b32 s9, s0, 16 3845; GFX9-NEXT: s_mov_b32 s6, 0xf000f 3846; GFX9-NEXT: s_lshl_b32 s0, s0, s8 3847; GFX9-NEXT: s_lshl_b32 s9, s9, 1 3848; GFX9-NEXT: s_and_b32 s7, s4, s6 3849; GFX9-NEXT: s_andn2_b32 s4, s6, s4 3850; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s9 3851; GFX9-NEXT: s_lshr_b32 s9, s0, 16 3852; GFX9-NEXT: s_lshr_b32 s10, s4, 16 3853; GFX9-NEXT: s_lshl_b32 s0, s0, s4 3854; GFX9-NEXT: s_lshl_b32 s4, s9, s10 3855; GFX9-NEXT: s_mov_b32 s9, 0xffff 3856; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 3857; GFX9-NEXT: s_lshr_b32 s4, s2, 16 3858; GFX9-NEXT: s_and_b32 s2, s2, s9 3859; GFX9-NEXT: s_lshr_b32 s10, s7, 16 3860; GFX9-NEXT: s_lshr_b32 s2, s2, s7 3861; GFX9-NEXT: s_lshr_b32 s4, s4, s10 3862; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 3863; GFX9-NEXT: s_or_b32 s0, s0, s2 3864; GFX9-NEXT: s_and_b32 s2, s5, s6 3865; GFX9-NEXT: s_andn2_b32 s4, s6, s5 3866; GFX9-NEXT: s_lshr_b32 s5, s1, 16 3867; GFX9-NEXT: s_lshl_b32 s1, s1, s8 3868; GFX9-NEXT: s_lshl_b32 s5, s5, 1 3869; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5 3870; GFX9-NEXT: s_lshr_b32 s5, s1, 16 3871; GFX9-NEXT: s_lshr_b32 s6, s4, 16 3872; GFX9-NEXT: s_lshl_b32 s1, s1, s4 3873; GFX9-NEXT: s_lshl_b32 s4, s5, s6 3874; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4 3875; GFX9-NEXT: s_lshr_b32 s4, s3, 16 3876; GFX9-NEXT: s_and_b32 s3, s3, s9 3877; GFX9-NEXT: s_lshr_b32 s5, s2, 16 3878; GFX9-NEXT: s_lshr_b32 s2, s3, s2 3879; GFX9-NEXT: s_lshr_b32 s3, s4, s5 3880; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3 3881; GFX9-NEXT: s_or_b32 s1, s1, s2 3882; GFX9-NEXT: ; return to shader part epilog 3883; 3884; GFX10-LABEL: s_fshr_v4i16: 3885; GFX10: ; %bb.0: 3886; GFX10-NEXT: s_mov_b32 s7, 0x10001 3887; GFX10-NEXT: s_lshr_b32 s8, s0, 16 3888; GFX10-NEXT: s_mov_b32 s6, 0xf000f 3889; GFX10-NEXT: s_lshl_b32 s0, s0, s7 3890; GFX10-NEXT: s_lshl_b32 s8, s8, 1 3891; GFX10-NEXT: s_and_b32 s9, s4, s6 3892; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s8 3893; GFX10-NEXT: s_andn2_b32 s4, s6, s4 3894; GFX10-NEXT: s_lshr_b32 s8, s0, 16 3895; GFX10-NEXT: s_lshr_b32 s10, s4, 16 3896; GFX10-NEXT: s_lshl_b32 s0, s0, s4 3897; GFX10-NEXT: s_lshl_b32 s4, s8, s10 3898; GFX10-NEXT: s_mov_b32 s8, 0xffff 3899; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4 3900; GFX10-NEXT: s_lshr_b32 s4, s1, 16 3901; GFX10-NEXT: s_lshl_b32 s1, s1, s7 3902; GFX10-NEXT: s_lshl_b32 s4, s4, 1 3903; GFX10-NEXT: s_and_b32 s7, s5, s6 3904; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4 3905; GFX10-NEXT: s_andn2_b32 s4, s6, s5 3906; GFX10-NEXT: s_lshr_b32 s5, s1, 16 3907; GFX10-NEXT: s_lshr_b32 s6, s4, 16 3908; GFX10-NEXT: s_lshr_b32 s10, s2, 16 3909; GFX10-NEXT: s_and_b32 s2, s2, s8 3910; GFX10-NEXT: s_lshr_b32 s11, s9, 16 3911; GFX10-NEXT: s_lshl_b32 s1, s1, s4 3912; GFX10-NEXT: s_lshl_b32 s4, s5, s6 3913; GFX10-NEXT: s_lshr_b32 s5, s3, 16 3914; GFX10-NEXT: s_and_b32 s3, s3, s8 3915; GFX10-NEXT: s_lshr_b32 s6, s7, 16 3916; GFX10-NEXT: s_lshr_b32 s2, s2, s9 3917; GFX10-NEXT: s_lshr_b32 s9, s10, s11 3918; GFX10-NEXT: s_lshr_b32 s3, s3, s7 3919; GFX10-NEXT: s_lshr_b32 s5, s5, s6 3920; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s9 3921; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4 3922; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s5 3923; GFX10-NEXT: s_or_b32 s0, s0, s2 3924; GFX10-NEXT: s_or_b32 s1, s1, s3 3925; GFX10-NEXT: ; return to shader part epilog 3926 %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) 3927 %cast.result = bitcast <4 x i16> %result to <2 x i32> 3928 ret <2 x i32> %cast.result 3929} 3930 3931define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) { 3932; GFX6-LABEL: v_fshr_v4i16: 3933; GFX6: ; %bb.0: 3934; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3935; GFX6-NEXT: v_mov_b32_e32 v12, 0xffff 3936; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v9 3937; GFX6-NEXT: v_and_b32_e32 v8, v8, v12 3938; GFX6-NEXT: v_or_b32_e32 v8, v9, v8 3939; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v11 3940; GFX6-NEXT: v_and_b32_e32 v10, v10, v12 3941; GFX6-NEXT: v_or_b32_e32 v9, v9, v10 3942; GFX6-NEXT: s_bfe_u32 s4, 1, 0x100000 3943; GFX6-NEXT: v_bfe_u32 v10, v4, 1, 15 3944; GFX6-NEXT: s_bfe_u32 s5, 14, 0x100000 3945; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0 3946; GFX6-NEXT: v_lshrrev_b32_e32 v10, s5, v10 3947; GFX6-NEXT: v_or_b32_e32 v0, v0, v10 3948; GFX6-NEXT: v_bfe_u32 v10, v5, 1, 15 3949; GFX6-NEXT: v_lshlrev_b32_e32 v1, s4, v1 3950; GFX6-NEXT: v_lshrrev_b32_e32 v10, s5, v10 3951; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 3952; GFX6-NEXT: v_or_b32_e32 v1, v1, v10 3953; GFX6-NEXT: v_lshrrev_b32_e32 v10, 16, v8 3954; GFX6-NEXT: v_and_b32_e32 v11, 15, v8 3955; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 3956; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4 3957; GFX6-NEXT: v_and_b32_e32 v8, 15, v8 3958; GFX6-NEXT: v_bfe_u32 v11, v11, 0, 16 3959; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15 3960; GFX6-NEXT: v_bfe_u32 v8, v8, 0, 16 3961; GFX6-NEXT: v_lshlrev_b32_e32 v0, v11, v0 3962; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4 3963; GFX6-NEXT: v_or_b32_e32 v0, v0, v4 3964; GFX6-NEXT: v_and_b32_e32 v4, 15, v10 3965; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10 3966; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5 3967; GFX6-NEXT: v_and_b32_e32 v8, 15, v8 3968; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16 3969; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1 3970; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15 3971; GFX6-NEXT: v_bfe_u32 v5, v8, 0, 16 3972; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4 3973; GFX6-NEXT: v_or_b32_e32 v1, v1, v4 3974; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15 3975; GFX6-NEXT: v_lshlrev_b32_e32 v2, s4, v2 3976; GFX6-NEXT: v_lshrrev_b32_e32 v4, s5, v4 3977; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 3978; GFX6-NEXT: v_bfe_u32 v4, v7, 1, 15 3979; GFX6-NEXT: v_lshlrev_b32_e32 v3, s4, v3 3980; GFX6-NEXT: v_lshrrev_b32_e32 v4, s5, v4 3981; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 3982; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v6 3983; GFX6-NEXT: v_xor_b32_e32 v6, -1, v9 3984; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v7 3985; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6 3986; GFX6-NEXT: v_and_b32_e32 v8, 15, v6 3987; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6 3988; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 3989; GFX6-NEXT: v_bfe_u32 v8, v8, 0, 16 3990; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15 3991; GFX6-NEXT: v_bfe_u32 v6, v6, 0, 16 3992; GFX6-NEXT: v_lshlrev_b32_e32 v2, v8, v2 3993; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4 3994; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 3995; GFX6-NEXT: v_and_b32_e32 v4, 15, v7 3996; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7 3997; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 3998; GFX6-NEXT: v_bfe_u32 v4, v4, 0, 16 3999; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3 4000; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15 4001; GFX6-NEXT: v_bfe_u32 v5, v6, 0, 16 4002; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4 4003; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 4004; GFX6-NEXT: s_setpc_b64 s[30:31] 4005; 4006; GFX8-LABEL: v_fshr_v4i16: 4007; GFX8: ; %bb.0: 4008; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4009; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v0 4010; GFX8-NEXT: v_lshrrev_b16_e32 v7, 15, v2 4011; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 4012; GFX8-NEXT: v_mov_b32_e32 v7, 1 4013; GFX8-NEXT: v_mov_b32_e32 v8, 15 4014; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4015; GFX8-NEXT: v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4016; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 4017; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 4018; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v2 4019; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4020; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v4 4021; GFX8-NEXT: v_and_b32_e32 v10, 15, v4 4022; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 4023; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 4024; GFX8-NEXT: v_lshrrev_b16_e32 v9, 1, v9 4025; GFX8-NEXT: v_lshlrev_b16_e32 v6, v10, v6 4026; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v9 4027; GFX8-NEXT: v_or_b32_e32 v4, v6, v4 4028; GFX8-NEXT: v_and_b32_e32 v6, 15, v7 4029; GFX8-NEXT: v_xor_b32_e32 v7, -1, v7 4030; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 4031; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2 4032; GFX8-NEXT: v_lshlrev_b16_e32 v0, v6, v0 4033; GFX8-NEXT: v_lshrrev_b16_e32 v2, v7, v2 4034; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 4035; GFX8-NEXT: v_mov_b32_e32 v2, 16 4036; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 4037; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4038; GFX8-NEXT: v_lshlrev_b16_e32 v4, 1, v1 4039; GFX8-NEXT: v_lshrrev_b16_e32 v6, 15, v3 4040; GFX8-NEXT: v_or_b32_e32 v4, v4, v6 4041; GFX8-NEXT: v_mov_b32_e32 v6, 1 4042; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4043; GFX8-NEXT: v_lshrrev_b16_sdwa v7, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4044; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 4045; GFX8-NEXT: v_or_b32_e32 v1, v1, v7 4046; GFX8-NEXT: v_lshlrev_b16_e32 v7, 1, v3 4047; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4048; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v5 4049; GFX8-NEXT: v_and_b32_e32 v8, 15, v5 4050; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 4051; GFX8-NEXT: v_and_b32_e32 v5, 15, v5 4052; GFX8-NEXT: v_lshrrev_b16_e32 v7, 1, v7 4053; GFX8-NEXT: v_lshlrev_b16_e32 v4, v8, v4 4054; GFX8-NEXT: v_lshrrev_b16_e32 v5, v5, v7 4055; GFX8-NEXT: v_or_b32_e32 v4, v4, v5 4056; GFX8-NEXT: v_and_b32_e32 v5, 15, v6 4057; GFX8-NEXT: v_xor_b32_e32 v6, -1, v6 4058; GFX8-NEXT: v_and_b32_e32 v6, 15, v6 4059; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3 4060; GFX8-NEXT: v_lshlrev_b16_e32 v1, v5, v1 4061; GFX8-NEXT: v_lshrrev_b16_e32 v3, v6, v3 4062; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 4063; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 4064; GFX8-NEXT: v_or_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4065; GFX8-NEXT: s_setpc_b64 s[30:31] 4066; 4067; GFX9-LABEL: v_fshr_v4i16: 4068; GFX9: ; %bb.0: 4069; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4070; GFX9-NEXT: s_mov_b32 s4, 0xf000f 4071; GFX9-NEXT: v_and_b32_e32 v6, s4, v4 4072; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4 4073; GFX9-NEXT: v_and_b32_e32 v4, s4, v4 4074; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4075; GFX9-NEXT: v_pk_lshlrev_b16 v0, v4, v0 4076; GFX9-NEXT: v_pk_lshrrev_b16 v2, v6, v2 4077; GFX9-NEXT: v_xor_b32_e32 v4, -1, v5 4078; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 4079; GFX9-NEXT: v_and_b32_e32 v2, s4, v5 4080; GFX9-NEXT: v_and_b32_e32 v4, s4, v4 4081; GFX9-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] 4082; GFX9-NEXT: v_pk_lshlrev_b16 v1, v4, v1 4083; GFX9-NEXT: v_pk_lshrrev_b16 v2, v2, v3 4084; GFX9-NEXT: v_or_b32_e32 v1, v1, v2 4085; GFX9-NEXT: s_setpc_b64 s[30:31] 4086; 4087; GFX10-LABEL: v_fshr_v4i16: 4088; GFX10: ; %bb.0: 4089; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4090; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4091; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4 4092; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 4093; GFX10-NEXT: s_mov_b32 s4, 0xf000f 4094; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4095; GFX10-NEXT: v_and_b32_e32 v4, s4, v4 4096; GFX10-NEXT: v_and_b32_e32 v6, s4, v6 4097; GFX10-NEXT: v_and_b32_e32 v5, s4, v5 4098; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] 4099; GFX10-NEXT: v_and_b32_e32 v7, s4, v7 4100; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v2 4101; GFX10-NEXT: v_pk_lshlrev_b16 v0, v6, v0 4102; GFX10-NEXT: v_pk_lshrrev_b16 v3, v5, v3 4103; GFX10-NEXT: v_pk_lshlrev_b16 v1, v7, v1 4104; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 4105; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 4106; GFX10-NEXT: s_setpc_b64 s[30:31] 4107 %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) 4108 %cast.result = bitcast <4 x i16> %result to <4 x half> 4109 ret <4 x half> %cast.result 4110} 4111 4112define amdgpu_ps i64 @s_fshr_i64(i64 inreg %lhs, i64 inreg %rhs, i64 inreg %amt) { 4113; GFX6-LABEL: s_fshr_i64: 4114; GFX6: ; %bb.0: 4115; GFX6-NEXT: s_and_b64 s[6:7], s[4:5], 63 4116; GFX6-NEXT: s_andn2_b64 s[4:5], 63, s[4:5] 4117; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4118; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 4119; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s6 4120; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 4121; GFX6-NEXT: ; return to shader part epilog 4122; 4123; GFX8-LABEL: s_fshr_i64: 4124; GFX8: ; %bb.0: 4125; GFX8-NEXT: s_and_b64 s[6:7], s[4:5], 63 4126; GFX8-NEXT: s_andn2_b64 s[4:5], 63, s[4:5] 4127; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4128; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 4129; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s6 4130; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 4131; GFX8-NEXT: ; return to shader part epilog 4132; 4133; GFX9-LABEL: s_fshr_i64: 4134; GFX9: ; %bb.0: 4135; GFX9-NEXT: s_and_b64 s[6:7], s[4:5], 63 4136; GFX9-NEXT: s_andn2_b64 s[4:5], 63, s[4:5] 4137; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4138; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 4139; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s6 4140; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 4141; GFX9-NEXT: ; return to shader part epilog 4142; 4143; GFX10-LABEL: s_fshr_i64: 4144; GFX10: ; %bb.0: 4145; GFX10-NEXT: s_andn2_b64 s[6:7], 63, s[4:5] 4146; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4147; GFX10-NEXT: s_and_b64 s[4:5], s[4:5], 63 4148; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s6 4149; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s4 4150; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 4151; GFX10-NEXT: ; return to shader part epilog 4152 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 4153 ret i64 %result 4154} 4155 4156define amdgpu_ps i64 @s_fshr_i64_5(i64 inreg %lhs, i64 inreg %rhs) { 4157; GCN-LABEL: s_fshr_i64_5: 4158; GCN: ; %bb.0: 4159; GCN-NEXT: s_lshl_b32 s1, s0, 27 4160; GCN-NEXT: s_mov_b32 s0, 0 4161; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], 5 4162; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 4163; GCN-NEXT: ; return to shader part epilog 4164 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5) 4165 ret i64 %result 4166} 4167 4168define amdgpu_ps i64 @s_fshr_i64_32(i64 inreg %lhs, i64 inreg %rhs) { 4169; GCN-LABEL: s_fshr_i64_32: 4170; GCN: ; %bb.0: 4171; GCN-NEXT: s_mov_b32 s1, s0 4172; GCN-NEXT: s_mov_b32 s0, 0 4173; GCN-NEXT: s_mov_b32 s2, s3 4174; GCN-NEXT: s_mov_b32 s3, s0 4175; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 4176; GCN-NEXT: ; return to shader part epilog 4177 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32) 4178 ret i64 %result 4179} 4180 4181define amdgpu_ps i64 @s_fshr_i64_48(i64 inreg %lhs, i64 inreg %rhs) { 4182; GCN-LABEL: s_fshr_i64_48: 4183; GCN: ; %bb.0: 4184; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 16 4185; GCN-NEXT: s_lshr_b32 s2, s3, 16 4186; GCN-NEXT: s_mov_b32 s3, 0 4187; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 4188; GCN-NEXT: ; return to shader part epilog 4189 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48) 4190 ret i64 %result 4191} 4192 4193define i64 @v_fshr_i64(i64 %lhs, i64 %rhs, i64 %amt) { 4194; GFX6-LABEL: v_fshr_i64: 4195; GFX6: ; %bb.0: 4196; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4197; GFX6-NEXT: v_and_b32_e32 v5, 63, v4 4198; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 4199; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 4200; GFX6-NEXT: v_and_b32_e32 v4, 63, v4 4201; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v4 4202; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], v5 4203; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 4204; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 4205; GFX6-NEXT: s_setpc_b64 s[30:31] 4206; 4207; GFX8-LABEL: v_fshr_i64: 4208; GFX8: ; %bb.0: 4209; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4210; GFX8-NEXT: v_and_b32_e32 v5, 63, v4 4211; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 4212; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4213; GFX8-NEXT: v_and_b32_e32 v4, 63, v4 4214; GFX8-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] 4215; GFX8-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3] 4216; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 4217; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 4218; GFX8-NEXT: s_setpc_b64 s[30:31] 4219; 4220; GFX9-LABEL: v_fshr_i64: 4221; GFX9: ; %bb.0: 4222; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4223; GFX9-NEXT: v_and_b32_e32 v5, 63, v4 4224; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4 4225; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4226; GFX9-NEXT: v_and_b32_e32 v4, 63, v4 4227; GFX9-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] 4228; GFX9-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3] 4229; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 4230; GFX9-NEXT: v_or_b32_e32 v1, v1, v3 4231; GFX9-NEXT: s_setpc_b64 s[30:31] 4232; 4233; GFX10-LABEL: v_fshr_i64: 4234; GFX10: ; %bb.0: 4235; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4236; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4237; GFX10-NEXT: v_xor_b32_e32 v5, -1, v4 4238; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4239; GFX10-NEXT: v_and_b32_e32 v4, 63, v4 4240; GFX10-NEXT: v_and_b32_e32 v5, 63, v5 4241; GFX10-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3] 4242; GFX10-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1] 4243; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 4244; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 4245; GFX10-NEXT: s_setpc_b64 s[30:31] 4246 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 4247 ret i64 %result 4248} 4249 4250define i64 @v_fshr_i64_5(i64 %lhs, i64 %rhs) { 4251; GFX6-LABEL: v_fshr_i64_5: 4252; GFX6: ; %bb.0: 4253; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4254; GFX6-NEXT: v_mov_b32_e32 v4, v0 4255; GFX6-NEXT: v_lshr_b64 v[0:1], v[2:3], 5 4256; GFX6-NEXT: v_lshlrev_b32_e32 v2, 27, v4 4257; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 4258; GFX6-NEXT: s_setpc_b64 s[30:31] 4259; 4260; GFX8-LABEL: v_fshr_i64_5: 4261; GFX8: ; %bb.0: 4262; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4263; GFX8-NEXT: v_mov_b32_e32 v4, v0 4264; GFX8-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] 4265; GFX8-NEXT: v_lshlrev_b32_e32 v2, 27, v4 4266; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 4267; GFX8-NEXT: s_setpc_b64 s[30:31] 4268; 4269; GFX9-LABEL: v_fshr_i64_5: 4270; GFX9: ; %bb.0: 4271; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4272; GFX9-NEXT: v_mov_b32_e32 v4, v0 4273; GFX9-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] 4274; GFX9-NEXT: v_lshlrev_b32_e32 v2, 27, v4 4275; GFX9-NEXT: v_or_b32_e32 v1, v2, v1 4276; GFX9-NEXT: s_setpc_b64 s[30:31] 4277; 4278; GFX10-LABEL: v_fshr_i64_5: 4279; GFX10: ; %bb.0: 4280; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4281; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4282; GFX10-NEXT: v_mov_b32_e32 v4, v0 4283; GFX10-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] 4284; GFX10-NEXT: v_lshlrev_b32_e32 v2, 27, v4 4285; GFX10-NEXT: v_or_b32_e32 v1, v2, v1 4286; GFX10-NEXT: s_setpc_b64 s[30:31] 4287 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5) 4288 ret i64 %result 4289} 4290 4291define i64 @v_fshr_i64_32(i64 %lhs, i64 %rhs) { 4292; GFX6-LABEL: v_fshr_i64_32: 4293; GFX6: ; %bb.0: 4294; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4295; GFX6-NEXT: v_mov_b32_e32 v1, v0 4296; GFX6-NEXT: v_mov_b32_e32 v0, v3 4297; GFX6-NEXT: s_setpc_b64 s[30:31] 4298; 4299; GFX8-LABEL: v_fshr_i64_32: 4300; GFX8: ; %bb.0: 4301; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4302; GFX8-NEXT: v_mov_b32_e32 v1, v0 4303; GFX8-NEXT: v_mov_b32_e32 v0, v3 4304; GFX8-NEXT: s_setpc_b64 s[30:31] 4305; 4306; GFX9-LABEL: v_fshr_i64_32: 4307; GFX9: ; %bb.0: 4308; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4309; GFX9-NEXT: v_mov_b32_e32 v1, v0 4310; GFX9-NEXT: v_mov_b32_e32 v0, v3 4311; GFX9-NEXT: s_setpc_b64 s[30:31] 4312; 4313; GFX10-LABEL: v_fshr_i64_32: 4314; GFX10: ; %bb.0: 4315; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4316; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4317; GFX10-NEXT: v_mov_b32_e32 v1, v0 4318; GFX10-NEXT: v_mov_b32_e32 v0, v3 4319; GFX10-NEXT: s_setpc_b64 s[30:31] 4320 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32) 4321 ret i64 %result 4322} 4323 4324define i64 @v_fshr_i64_48(i64 %lhs, i64 %rhs) { 4325; GFX6-LABEL: v_fshr_i64_48: 4326; GFX6: ; %bb.0: 4327; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4328; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 16 4329; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v3 4330; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 4331; GFX6-NEXT: s_setpc_b64 s[30:31] 4332; 4333; GFX8-LABEL: v_fshr_i64_48: 4334; GFX8: ; %bb.0: 4335; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4336; GFX8-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] 4337; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4338; GFX8-NEXT: s_setpc_b64 s[30:31] 4339; 4340; GFX9-LABEL: v_fshr_i64_48: 4341; GFX9: ; %bb.0: 4342; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4343; GFX9-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] 4344; GFX9-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4345; GFX9-NEXT: s_setpc_b64 s[30:31] 4346; 4347; GFX10-LABEL: v_fshr_i64_48: 4348; GFX10: ; %bb.0: 4349; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4350; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4351; GFX10-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] 4352; GFX10-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4353; GFX10-NEXT: s_setpc_b64 s[30:31] 4354 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48) 4355 ret i64 %result 4356} 4357 4358define amdgpu_ps <2 x float> @v_fshr_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) { 4359; GFX6-LABEL: v_fshr_i64_ssv: 4360; GFX6: ; %bb.0: 4361; GFX6-NEXT: v_and_b32_e32 v2, 63, v0 4362; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 4363; GFX6-NEXT: v_and_b32_e32 v0, 63, v0 4364; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4365; GFX6-NEXT: v_lshl_b64 v[0:1], s[0:1], v0 4366; GFX6-NEXT: v_lshr_b64 v[2:3], s[2:3], v2 4367; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 4368; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 4369; GFX6-NEXT: ; return to shader part epilog 4370; 4371; GFX8-LABEL: v_fshr_i64_ssv: 4372; GFX8: ; %bb.0: 4373; GFX8-NEXT: v_and_b32_e32 v2, 63, v0 4374; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 4375; GFX8-NEXT: v_and_b32_e32 v0, 63, v0 4376; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4377; GFX8-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1] 4378; GFX8-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3] 4379; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 4380; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 4381; GFX8-NEXT: ; return to shader part epilog 4382; 4383; GFX9-LABEL: v_fshr_i64_ssv: 4384; GFX9: ; %bb.0: 4385; GFX9-NEXT: v_and_b32_e32 v2, 63, v0 4386; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 4387; GFX9-NEXT: v_and_b32_e32 v0, 63, v0 4388; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4389; GFX9-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1] 4390; GFX9-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3] 4391; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 4392; GFX9-NEXT: v_or_b32_e32 v1, v1, v3 4393; GFX9-NEXT: ; return to shader part epilog 4394; 4395; GFX10-LABEL: v_fshr_i64_ssv: 4396; GFX10: ; %bb.0: 4397; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 4398; GFX10-NEXT: v_and_b32_e32 v0, 63, v0 4399; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4400; GFX10-NEXT: v_and_b32_e32 v2, 63, v1 4401; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[2:3] 4402; GFX10-NEXT: v_lshlrev_b64 v[2:3], v2, s[0:1] 4403; GFX10-NEXT: v_or_b32_e32 v0, v2, v0 4404; GFX10-NEXT: v_or_b32_e32 v1, v3, v1 4405; GFX10-NEXT: ; return to shader part epilog 4406 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 4407 %cast = bitcast i64 %result to <2 x float> 4408 ret <2 x float> %cast 4409} 4410 4411define amdgpu_ps <2 x float> @v_fshr_i64_svs(i64 inreg %lhs, i64 %rhs, i64 inreg %amt) { 4412; GFX6-LABEL: v_fshr_i64_svs: 4413; GFX6: ; %bb.0: 4414; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63 4415; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 4416; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4417; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], s4 4418; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 4419; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 4420; GFX6-NEXT: v_or_b32_e32 v1, s1, v1 4421; GFX6-NEXT: ; return to shader part epilog 4422; 4423; GFX8-LABEL: v_fshr_i64_svs: 4424; GFX8: ; %bb.0: 4425; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63 4426; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 4427; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4428; GFX8-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 4429; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 4430; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 4431; GFX8-NEXT: v_or_b32_e32 v1, s1, v1 4432; GFX8-NEXT: ; return to shader part epilog 4433; 4434; GFX9-LABEL: v_fshr_i64_svs: 4435; GFX9: ; %bb.0: 4436; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63 4437; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 4438; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4439; GFX9-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 4440; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 4441; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 4442; GFX9-NEXT: v_or_b32_e32 v1, s1, v1 4443; GFX9-NEXT: ; return to shader part epilog 4444; 4445; GFX10-LABEL: v_fshr_i64_svs: 4446; GFX10: ; %bb.0: 4447; GFX10-NEXT: s_and_b64 s[4:5], s[2:3], 63 4448; GFX10-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 4449; GFX10-NEXT: v_lshrrev_b64 v[0:1], s4, v[0:1] 4450; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4451; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 4452; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 4453; GFX10-NEXT: v_or_b32_e32 v1, s1, v1 4454; GFX10-NEXT: ; return to shader part epilog 4455 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 4456 %cast = bitcast i64 %result to <2 x float> 4457 ret <2 x float> %cast 4458} 4459 4460define amdgpu_ps <2 x float> @v_fshr_i64_vss(i64 %lhs, i64 inreg %rhs, i64 inreg %amt) { 4461; GFX6-LABEL: v_fshr_i64_vss: 4462; GFX6: ; %bb.0: 4463; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 4464; GFX6-NEXT: s_and_b64 s[4:5], s[2:3], 63 4465; GFX6-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 4466; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s2 4467; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 4468; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 4469; GFX6-NEXT: v_or_b32_e32 v1, s1, v1 4470; GFX6-NEXT: ; return to shader part epilog 4471; 4472; GFX8-LABEL: v_fshr_i64_vss: 4473; GFX8: ; %bb.0: 4474; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4475; GFX8-NEXT: s_and_b64 s[4:5], s[2:3], 63 4476; GFX8-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 4477; GFX8-NEXT: v_lshlrev_b64 v[0:1], s2, v[0:1] 4478; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 4479; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 4480; GFX8-NEXT: v_or_b32_e32 v1, s1, v1 4481; GFX8-NEXT: ; return to shader part epilog 4482; 4483; GFX9-LABEL: v_fshr_i64_vss: 4484; GFX9: ; %bb.0: 4485; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4486; GFX9-NEXT: s_and_b64 s[4:5], s[2:3], 63 4487; GFX9-NEXT: s_andn2_b64 s[2:3], 63, s[2:3] 4488; GFX9-NEXT: v_lshlrev_b64 v[0:1], s2, v[0:1] 4489; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s4 4490; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 4491; GFX9-NEXT: v_or_b32_e32 v1, s1, v1 4492; GFX9-NEXT: ; return to shader part epilog 4493; 4494; GFX10-LABEL: v_fshr_i64_vss: 4495; GFX10: ; %bb.0: 4496; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4497; GFX10-NEXT: s_andn2_b64 s[4:5], 63, s[2:3] 4498; GFX10-NEXT: s_and_b64 s[2:3], s[2:3], 63 4499; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 4500; GFX10-NEXT: v_lshlrev_b64 v[0:1], s4, v[0:1] 4501; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 4502; GFX10-NEXT: v_or_b32_e32 v1, s1, v1 4503; GFX10-NEXT: ; return to shader part epilog 4504 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 4505 %cast = bitcast i64 %result to <2 x float> 4506 ret <2 x float> %cast 4507} 4508 4509define amdgpu_ps <2 x i64> @s_fshr_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs, <2 x i64> inreg %amt) { 4510; GFX6-LABEL: s_fshr_v2i64: 4511; GFX6: ; %bb.0: 4512; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], 63 4513; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[8:9] 4514; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4515; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 4516; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s12 4517; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 4518; GFX6-NEXT: s_and_b64 s[4:5], s[10:11], 63 4519; GFX6-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] 4520; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4521; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 4522; GFX6-NEXT: s_lshr_b64 s[4:5], s[6:7], s4 4523; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 4524; GFX6-NEXT: ; return to shader part epilog 4525; 4526; GFX8-LABEL: s_fshr_v2i64: 4527; GFX8: ; %bb.0: 4528; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], 63 4529; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[8:9] 4530; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4531; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 4532; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s12 4533; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 4534; GFX8-NEXT: s_and_b64 s[4:5], s[10:11], 63 4535; GFX8-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] 4536; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4537; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 4538; GFX8-NEXT: s_lshr_b64 s[4:5], s[6:7], s4 4539; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 4540; GFX8-NEXT: ; return to shader part epilog 4541; 4542; GFX9-LABEL: s_fshr_v2i64: 4543; GFX9: ; %bb.0: 4544; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], 63 4545; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[8:9] 4546; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4547; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 4548; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s12 4549; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 4550; GFX9-NEXT: s_and_b64 s[4:5], s[10:11], 63 4551; GFX9-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] 4552; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4553; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 4554; GFX9-NEXT: s_lshr_b64 s[4:5], s[6:7], s4 4555; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 4556; GFX9-NEXT: ; return to shader part epilog 4557; 4558; GFX10-LABEL: s_fshr_v2i64: 4559; GFX10: ; %bb.0: 4560; GFX10-NEXT: s_andn2_b64 s[12:13], 63, s[8:9] 4561; GFX10-NEXT: s_and_b64 s[8:9], s[8:9], 63 4562; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4563; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s8 4564; GFX10-NEXT: s_andn2_b64 s[8:9], 63, s[10:11] 4565; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4566; GFX10-NEXT: s_and_b64 s[10:11], s[10:11], 63 4567; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s12 4568; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], s8 4569; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s10 4570; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 4571; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] 4572; GFX10-NEXT: ; return to shader part epilog 4573 %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) 4574 ret <2 x i64> %result 4575} 4576 4577define <2 x i64> @v_fshr_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) { 4578; GFX6-LABEL: v_fshr_v2i64: 4579; GFX6: ; %bb.0: 4580; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4581; GFX6-NEXT: v_and_b32_e32 v9, 63, v8 4582; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 4583; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 4584; GFX6-NEXT: v_and_b32_e32 v8, 63, v8 4585; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v8 4586; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], v9 4587; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10 4588; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 4589; GFX6-NEXT: v_or_b32_e32 v0, v0, v4 4590; GFX6-NEXT: v_and_b32_e32 v4, 63, v10 4591; GFX6-NEXT: v_and_b32_e32 v8, 63, v8 4592; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], v8 4593; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], v4 4594; GFX6-NEXT: v_or_b32_e32 v1, v1, v5 4595; GFX6-NEXT: v_or_b32_e32 v2, v2, v6 4596; GFX6-NEXT: v_or_b32_e32 v3, v3, v7 4597; GFX6-NEXT: s_setpc_b64 s[30:31] 4598; 4599; GFX8-LABEL: v_fshr_v2i64: 4600; GFX8: ; %bb.0: 4601; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4602; GFX8-NEXT: v_and_b32_e32 v9, 63, v8 4603; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8 4604; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4605; GFX8-NEXT: v_and_b32_e32 v8, 63, v8 4606; GFX8-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1] 4607; GFX8-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5] 4608; GFX8-NEXT: v_xor_b32_e32 v8, -1, v10 4609; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 4610; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 4611; GFX8-NEXT: v_and_b32_e32 v4, 63, v10 4612; GFX8-NEXT: v_and_b32_e32 v8, 63, v8 4613; GFX8-NEXT: v_lshlrev_b64 v[2:3], v8, v[2:3] 4614; GFX8-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7] 4615; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 4616; GFX8-NEXT: v_or_b32_e32 v2, v2, v6 4617; GFX8-NEXT: v_or_b32_e32 v3, v3, v7 4618; GFX8-NEXT: s_setpc_b64 s[30:31] 4619; 4620; GFX9-LABEL: v_fshr_v2i64: 4621; GFX9: ; %bb.0: 4622; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4623; GFX9-NEXT: v_and_b32_e32 v9, 63, v8 4624; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8 4625; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4626; GFX9-NEXT: v_and_b32_e32 v8, 63, v8 4627; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1] 4628; GFX9-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5] 4629; GFX9-NEXT: v_xor_b32_e32 v8, -1, v10 4630; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 4631; GFX9-NEXT: v_or_b32_e32 v0, v0, v4 4632; GFX9-NEXT: v_and_b32_e32 v4, 63, v10 4633; GFX9-NEXT: v_and_b32_e32 v8, 63, v8 4634; GFX9-NEXT: v_lshlrev_b64 v[2:3], v8, v[2:3] 4635; GFX9-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7] 4636; GFX9-NEXT: v_or_b32_e32 v1, v1, v5 4637; GFX9-NEXT: v_or_b32_e32 v2, v2, v6 4638; GFX9-NEXT: v_or_b32_e32 v3, v3, v7 4639; GFX9-NEXT: s_setpc_b64 s[30:31] 4640; 4641; GFX10-LABEL: v_fshr_v2i64: 4642; GFX10: ; %bb.0: 4643; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4644; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 4645; GFX10-NEXT: v_xor_b32_e32 v9, -1, v8 4646; GFX10-NEXT: v_xor_b32_e32 v11, -1, v10 4647; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 4648; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 4649; GFX10-NEXT: v_and_b32_e32 v8, 63, v8 4650; GFX10-NEXT: v_and_b32_e32 v9, 63, v9 4651; GFX10-NEXT: v_and_b32_e32 v11, 63, v11 4652; GFX10-NEXT: v_and_b32_e32 v10, 63, v10 4653; GFX10-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5] 4654; GFX10-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1] 4655; GFX10-NEXT: v_lshlrev_b64 v[2:3], v11, v[2:3] 4656; GFX10-NEXT: v_lshrrev_b64 v[6:7], v10, v[6:7] 4657; GFX10-NEXT: v_or_b32_e32 v0, v0, v4 4658; GFX10-NEXT: v_or_b32_e32 v1, v1, v5 4659; GFX10-NEXT: v_or_b32_e32 v2, v2, v6 4660; GFX10-NEXT: v_or_b32_e32 v3, v3, v7 4661; GFX10-NEXT: s_setpc_b64 s[30:31] 4662 %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) 4663 ret <2 x i64> %result 4664} 4665 4666define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) { 4667; GFX6-LABEL: s_fshr_i128: 4668; GFX6: ; %bb.0: 4669; GFX6-NEXT: s_movk_i32 s10, 0x7f 4670; GFX6-NEXT: s_mov_b32 s11, 0 4671; GFX6-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] 4672; GFX6-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] 4673; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4674; GFX6-NEXT: s_lshr_b32 s10, s1, 31 4675; GFX6-NEXT: s_lshl_b64 s[14:15], s[0:1], 1 4676; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[10:11] 4677; GFX6-NEXT: s_sub_i32 s13, s8, 64 4678; GFX6-NEXT: s_sub_i32 s9, 64, s8 4679; GFX6-NEXT: s_cmp_lt_u32 s8, 64 4680; GFX6-NEXT: s_cselect_b32 s16, 1, 0 4681; GFX6-NEXT: s_cmp_eq_u32 s8, 0 4682; GFX6-NEXT: s_cselect_b32 s17, 1, 0 4683; GFX6-NEXT: s_lshl_b64 s[2:3], s[14:15], s8 4684; GFX6-NEXT: s_lshr_b64 s[10:11], s[14:15], s9 4685; GFX6-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 4686; GFX6-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] 4687; GFX6-NEXT: s_lshl_b64 s[10:11], s[14:15], s13 4688; GFX6-NEXT: s_cmp_lg_u32 s16, 0 4689; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 4690; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 4691; GFX6-NEXT: s_cmp_lg_u32 s17, 0 4692; GFX6-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9] 4693; GFX6-NEXT: s_sub_i32 s14, s12, 64 4694; GFX6-NEXT: s_sub_i32 s13, 64, s12 4695; GFX6-NEXT: s_cmp_lt_u32 s12, 64 4696; GFX6-NEXT: s_cselect_b32 s15, 1, 0 4697; GFX6-NEXT: s_cmp_eq_u32 s12, 0 4698; GFX6-NEXT: s_cselect_b32 s16, 1, 0 4699; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 4700; GFX6-NEXT: s_lshr_b64 s[10:11], s[4:5], s12 4701; GFX6-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 4702; GFX6-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] 4703; GFX6-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 4704; GFX6-NEXT: s_cmp_lg_u32 s15, 0 4705; GFX6-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7] 4706; GFX6-NEXT: s_cmp_lg_u32 s16, 0 4707; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 4708; GFX6-NEXT: s_cmp_lg_u32 s15, 0 4709; GFX6-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 4710; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] 4711; GFX6-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7] 4712; GFX6-NEXT: ; return to shader part epilog 4713; 4714; GFX8-LABEL: s_fshr_i128: 4715; GFX8: ; %bb.0: 4716; GFX8-NEXT: s_movk_i32 s10, 0x7f 4717; GFX8-NEXT: s_mov_b32 s11, 0 4718; GFX8-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] 4719; GFX8-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] 4720; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4721; GFX8-NEXT: s_lshr_b32 s10, s1, 31 4722; GFX8-NEXT: s_lshl_b64 s[14:15], s[0:1], 1 4723; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[10:11] 4724; GFX8-NEXT: s_sub_i32 s13, s8, 64 4725; GFX8-NEXT: s_sub_i32 s9, 64, s8 4726; GFX8-NEXT: s_cmp_lt_u32 s8, 64 4727; GFX8-NEXT: s_cselect_b32 s16, 1, 0 4728; GFX8-NEXT: s_cmp_eq_u32 s8, 0 4729; GFX8-NEXT: s_cselect_b32 s17, 1, 0 4730; GFX8-NEXT: s_lshl_b64 s[2:3], s[14:15], s8 4731; GFX8-NEXT: s_lshr_b64 s[10:11], s[14:15], s9 4732; GFX8-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 4733; GFX8-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] 4734; GFX8-NEXT: s_lshl_b64 s[10:11], s[14:15], s13 4735; GFX8-NEXT: s_cmp_lg_u32 s16, 0 4736; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 4737; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 4738; GFX8-NEXT: s_cmp_lg_u32 s17, 0 4739; GFX8-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9] 4740; GFX8-NEXT: s_sub_i32 s14, s12, 64 4741; GFX8-NEXT: s_sub_i32 s13, 64, s12 4742; GFX8-NEXT: s_cmp_lt_u32 s12, 64 4743; GFX8-NEXT: s_cselect_b32 s15, 1, 0 4744; GFX8-NEXT: s_cmp_eq_u32 s12, 0 4745; GFX8-NEXT: s_cselect_b32 s16, 1, 0 4746; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 4747; GFX8-NEXT: s_lshr_b64 s[10:11], s[4:5], s12 4748; GFX8-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 4749; GFX8-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] 4750; GFX8-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 4751; GFX8-NEXT: s_cmp_lg_u32 s15, 0 4752; GFX8-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7] 4753; GFX8-NEXT: s_cmp_lg_u32 s16, 0 4754; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 4755; GFX8-NEXT: s_cmp_lg_u32 s15, 0 4756; GFX8-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 4757; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] 4758; GFX8-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7] 4759; GFX8-NEXT: ; return to shader part epilog 4760; 4761; GFX9-LABEL: s_fshr_i128: 4762; GFX9: ; %bb.0: 4763; GFX9-NEXT: s_movk_i32 s10, 0x7f 4764; GFX9-NEXT: s_mov_b32 s11, 0 4765; GFX9-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] 4766; GFX9-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] 4767; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4768; GFX9-NEXT: s_lshr_b32 s10, s1, 31 4769; GFX9-NEXT: s_lshl_b64 s[14:15], s[0:1], 1 4770; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[10:11] 4771; GFX9-NEXT: s_sub_i32 s13, s8, 64 4772; GFX9-NEXT: s_sub_i32 s9, 64, s8 4773; GFX9-NEXT: s_cmp_lt_u32 s8, 64 4774; GFX9-NEXT: s_cselect_b32 s16, 1, 0 4775; GFX9-NEXT: s_cmp_eq_u32 s8, 0 4776; GFX9-NEXT: s_cselect_b32 s17, 1, 0 4777; GFX9-NEXT: s_lshl_b64 s[2:3], s[14:15], s8 4778; GFX9-NEXT: s_lshr_b64 s[10:11], s[14:15], s9 4779; GFX9-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 4780; GFX9-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] 4781; GFX9-NEXT: s_lshl_b64 s[10:11], s[14:15], s13 4782; GFX9-NEXT: s_cmp_lg_u32 s16, 0 4783; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 4784; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 4785; GFX9-NEXT: s_cmp_lg_u32 s17, 0 4786; GFX9-NEXT: s_cselect_b64 s[8:9], s[0:1], s[8:9] 4787; GFX9-NEXT: s_sub_i32 s14, s12, 64 4788; GFX9-NEXT: s_sub_i32 s13, 64, s12 4789; GFX9-NEXT: s_cmp_lt_u32 s12, 64 4790; GFX9-NEXT: s_cselect_b32 s15, 1, 0 4791; GFX9-NEXT: s_cmp_eq_u32 s12, 0 4792; GFX9-NEXT: s_cselect_b32 s16, 1, 0 4793; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], s12 4794; GFX9-NEXT: s_lshr_b64 s[10:11], s[4:5], s12 4795; GFX9-NEXT: s_lshl_b64 s[12:13], s[6:7], s13 4796; GFX9-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] 4797; GFX9-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 4798; GFX9-NEXT: s_cmp_lg_u32 s15, 0 4799; GFX9-NEXT: s_cselect_b64 s[6:7], s[10:11], s[6:7] 4800; GFX9-NEXT: s_cmp_lg_u32 s16, 0 4801; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 4802; GFX9-NEXT: s_cmp_lg_u32 s15, 0 4803; GFX9-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 4804; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] 4805; GFX9-NEXT: s_or_b64 s[2:3], s[8:9], s[6:7] 4806; GFX9-NEXT: ; return to shader part epilog 4807; 4808; GFX10-LABEL: s_fshr_i128: 4809; GFX10: ; %bb.0: 4810; GFX10-NEXT: s_movk_i32 s10, 0x7f 4811; GFX10-NEXT: s_mov_b32 s11, 0 4812; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 4813; GFX10-NEXT: s_and_b64 s[12:13], s[8:9], s[10:11] 4814; GFX10-NEXT: s_andn2_b64 s[8:9], s[10:11], s[8:9] 4815; GFX10-NEXT: s_lshr_b32 s10, s1, 31 4816; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 4817; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[10:11] 4818; GFX10-NEXT: s_sub_i32 s13, s8, 64 4819; GFX10-NEXT: s_sub_i32 s9, 64, s8 4820; GFX10-NEXT: s_cmp_lt_u32 s8, 64 4821; GFX10-NEXT: s_cselect_b32 s16, 1, 0 4822; GFX10-NEXT: s_cmp_eq_u32 s8, 0 4823; GFX10-NEXT: s_cselect_b32 s17, 1, 0 4824; GFX10-NEXT: s_lshr_b64 s[10:11], s[0:1], s9 4825; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], s8 4826; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], s8 4827; GFX10-NEXT: s_or_b64 s[10:11], s[10:11], s[14:15] 4828; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s13 4829; GFX10-NEXT: s_cmp_lg_u32 s16, 0 4830; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 4831; GFX10-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] 4832; GFX10-NEXT: s_cmp_lg_u32 s17, 0 4833; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 4834; GFX10-NEXT: s_sub_i32 s14, s12, 64 4835; GFX10-NEXT: s_sub_i32 s10, 64, s12 4836; GFX10-NEXT: s_cmp_lt_u32 s12, 64 4837; GFX10-NEXT: s_cselect_b32 s15, 1, 0 4838; GFX10-NEXT: s_cmp_eq_u32 s12, 0 4839; GFX10-NEXT: s_cselect_b32 s16, 1, 0 4840; GFX10-NEXT: s_lshr_b64 s[0:1], s[4:5], s12 4841; GFX10-NEXT: s_lshl_b64 s[10:11], s[6:7], s10 4842; GFX10-NEXT: s_lshr_b64 s[12:13], s[6:7], s12 4843; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[10:11] 4844; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 4845; GFX10-NEXT: s_cmp_lg_u32 s15, 0 4846; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[6:7] 4847; GFX10-NEXT: s_cmp_lg_u32 s16, 0 4848; GFX10-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] 4849; GFX10-NEXT: s_cmp_lg_u32 s15, 0 4850; GFX10-NEXT: s_cselect_b64 s[4:5], s[12:13], 0 4851; GFX10-NEXT: s_or_b64 s[0:1], s[8:9], s[0:1] 4852; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 4853; GFX10-NEXT: ; return to shader part epilog 4854 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 4855 ret i128 %result 4856} 4857 4858define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) { 4859; GFX6-LABEL: v_fshr_i128: 4860; GFX6: ; %bb.0: 4861; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4862; GFX6-NEXT: s_movk_i32 s4, 0x7f 4863; GFX6-NEXT: v_and_b32_e32 v14, s4, v8 4864; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 4865; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 4866; GFX6-NEXT: v_and_b32_e32 v15, s4, v8 4867; GFX6-NEXT: v_lshl_b64 v[8:9], v[0:1], 1 4868; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1 4869; GFX6-NEXT: v_or_b32_e32 v2, v2, v0 4870; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v15 4871; GFX6-NEXT: v_lshr_b64 v[0:1], v[8:9], v0 4872; GFX6-NEXT: v_lshl_b64 v[10:11], v[2:3], v15 4873; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v15 4874; GFX6-NEXT: v_lshl_b64 v[12:13], v[8:9], v15 4875; GFX6-NEXT: v_or_b32_e32 v10, v0, v10 4876; GFX6-NEXT: v_or_b32_e32 v11, v1, v11 4877; GFX6-NEXT: v_lshl_b64 v[0:1], v[8:9], v16 4878; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 4879; GFX6-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc 4880; GFX6-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc 4881; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 4882; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 4883; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 4884; GFX6-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc 4885; GFX6-NEXT: v_cndmask_b32_e32 v11, v1, v3, vcc 4886; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v14 4887; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], v14 4888; GFX6-NEXT: v_lshl_b64 v[2:3], v[6:7], v2 4889; GFX6-NEXT: v_subrev_i32_e32 v15, vcc, 64, v14 4890; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 4891; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 4892; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], v15 4893; GFX6-NEXT: v_lshr_b64 v[8:9], v[6:7], v14 4894; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 4895; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 4896; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 4897; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 4898; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] 4899; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] 4900; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 4901; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 4902; GFX6-NEXT: v_or_b32_e32 v0, v12, v0 4903; GFX6-NEXT: v_or_b32_e32 v1, v13, v1 4904; GFX6-NEXT: v_or_b32_e32 v2, v10, v2 4905; GFX6-NEXT: v_or_b32_e32 v3, v11, v3 4906; GFX6-NEXT: s_setpc_b64 s[30:31] 4907; 4908; GFX8-LABEL: v_fshr_i128: 4909; GFX8: ; %bb.0: 4910; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4911; GFX8-NEXT: s_movk_i32 s4, 0x7f 4912; GFX8-NEXT: v_and_b32_e32 v14, s4, v8 4913; GFX8-NEXT: v_xor_b32_e32 v8, -1, v8 4914; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 4915; GFX8-NEXT: v_and_b32_e32 v15, s4, v8 4916; GFX8-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1] 4917; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1 4918; GFX8-NEXT: v_or_b32_e32 v2, v2, v0 4919; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v15 4920; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, v[8:9] 4921; GFX8-NEXT: v_lshlrev_b64 v[10:11], v15, v[2:3] 4922; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v15 4923; GFX8-NEXT: v_lshlrev_b64 v[12:13], v15, v[8:9] 4924; GFX8-NEXT: v_or_b32_e32 v10, v0, v10 4925; GFX8-NEXT: v_or_b32_e32 v11, v1, v11 4926; GFX8-NEXT: v_lshlrev_b64 v[0:1], v16, v[8:9] 4927; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 4928; GFX8-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc 4929; GFX8-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc 4930; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 4931; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 4932; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 4933; GFX8-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc 4934; GFX8-NEXT: v_cndmask_b32_e32 v11, v1, v3, vcc 4935; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v14 4936; GFX8-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5] 4937; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7] 4938; GFX8-NEXT: v_subrev_u32_e32 v15, vcc, 64, v14 4939; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 4940; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 4941; GFX8-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7] 4942; GFX8-NEXT: v_lshrrev_b64 v[8:9], v14, v[6:7] 4943; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 4944; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 4945; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 4946; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 4947; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] 4948; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] 4949; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 4950; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 4951; GFX8-NEXT: v_or_b32_e32 v0, v12, v0 4952; GFX8-NEXT: v_or_b32_e32 v1, v13, v1 4953; GFX8-NEXT: v_or_b32_e32 v2, v10, v2 4954; GFX8-NEXT: v_or_b32_e32 v3, v11, v3 4955; GFX8-NEXT: s_setpc_b64 s[30:31] 4956; 4957; GFX9-LABEL: v_fshr_i128: 4958; GFX9: ; %bb.0: 4959; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4960; GFX9-NEXT: s_movk_i32 s4, 0x7f 4961; GFX9-NEXT: v_and_b32_e32 v14, s4, v8 4962; GFX9-NEXT: v_xor_b32_e32 v8, -1, v8 4963; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 4964; GFX9-NEXT: v_and_b32_e32 v15, s4, v8 4965; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[0:1] 4966; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1 4967; GFX9-NEXT: v_or_b32_e32 v2, v2, v0 4968; GFX9-NEXT: v_sub_u32_e32 v0, 64, v15 4969; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, v[8:9] 4970; GFX9-NEXT: v_lshlrev_b64 v[10:11], v15, v[2:3] 4971; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v15 4972; GFX9-NEXT: v_lshlrev_b64 v[12:13], v15, v[8:9] 4973; GFX9-NEXT: v_or_b32_e32 v10, v0, v10 4974; GFX9-NEXT: v_or_b32_e32 v11, v1, v11 4975; GFX9-NEXT: v_lshlrev_b64 v[0:1], v16, v[8:9] 4976; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 4977; GFX9-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc 4978; GFX9-NEXT: v_cndmask_b32_e32 v13, 0, v13, vcc 4979; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v10, vcc 4980; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v11, vcc 4981; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 4982; GFX9-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc 4983; GFX9-NEXT: v_sub_u32_e32 v2, 64, v14 4984; GFX9-NEXT: v_cndmask_b32_e32 v11, v1, v3, vcc 4985; GFX9-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5] 4986; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7] 4987; GFX9-NEXT: v_subrev_u32_e32 v15, 64, v14 4988; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 4989; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 4990; GFX9-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7] 4991; GFX9-NEXT: v_lshrrev_b64 v[8:9], v14, v[6:7] 4992; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 4993; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 4994; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 4995; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 4996; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] 4997; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] 4998; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 4999; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 5000; GFX9-NEXT: v_or_b32_e32 v0, v12, v0 5001; GFX9-NEXT: v_or_b32_e32 v1, v13, v1 5002; GFX9-NEXT: v_or_b32_e32 v2, v10, v2 5003; GFX9-NEXT: v_or_b32_e32 v3, v11, v3 5004; GFX9-NEXT: s_setpc_b64 s[30:31] 5005; 5006; GFX10-LABEL: v_fshr_i128: 5007; GFX10: ; %bb.0: 5008; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5009; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5010; GFX10-NEXT: v_xor_b32_e32 v9, -1, v8 5011; GFX10-NEXT: s_movk_i32 s4, 0x7f 5012; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5013; GFX10-NEXT: v_lshrrev_b32_e32 v10, 31, v1 5014; GFX10-NEXT: v_and_b32_e32 v19, s4, v8 5015; GFX10-NEXT: v_and_b32_e32 v18, s4, v9 5016; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5017; GFX10-NEXT: v_or_b32_e32 v2, v2, v10 5018; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19 5019; GFX10-NEXT: v_sub_nc_u32_e32 v10, 64, v18 5020; GFX10-NEXT: v_subrev_nc_u32_e32 v21, 64, v18 5021; GFX10-NEXT: v_subrev_nc_u32_e32 v20, 64, v19 5022; GFX10-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3] 5023; GFX10-NEXT: v_lshrrev_b64 v[12:13], v19, v[4:5] 5024; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1] 5025; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7] 5026; GFX10-NEXT: v_lshlrev_b64 v[14:15], v18, v[0:1] 5027; GFX10-NEXT: v_lshlrev_b64 v[0:1], v21, v[0:1] 5028; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18 5029; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v19 5030; GFX10-NEXT: v_or_b32_e32 v10, v10, v8 5031; GFX10-NEXT: v_or_b32_e32 v11, v11, v9 5032; GFX10-NEXT: v_lshrrev_b64 v[8:9], v20, v[6:7] 5033; GFX10-NEXT: v_or_b32_e32 v12, v12, v16 5034; GFX10-NEXT: v_or_b32_e32 v13, v13, v17 5035; GFX10-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo 5036; GFX10-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo 5037; GFX10-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7] 5038; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v12, s4 5039; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v19 5040; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v18 5041; GFX10-NEXT: v_cndmask_b32_e64 v6, v9, v13, s4 5042; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc_lo 5043; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v15, vcc_lo 5044; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v4, s5 5045; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s6 5046; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s6 5047; GFX10-NEXT: v_cndmask_b32_e64 v5, v6, v5, s5 5048; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v0, s4 5049; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v1, s4 5050; GFX10-NEXT: v_or_b32_e32 v0, v14, v4 5051; GFX10-NEXT: v_or_b32_e32 v1, v7, v5 5052; GFX10-NEXT: v_or_b32_e32 v2, v2, v6 5053; GFX10-NEXT: v_or_b32_e32 v3, v3, v8 5054; GFX10-NEXT: s_setpc_b64 s[30:31] 5055 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 5056 ret i128 %result 5057} 5058 5059define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) { 5060; GFX6-LABEL: v_fshr_i128_ssv: 5061; GFX6: ; %bb.0: 5062; GFX6-NEXT: s_movk_i32 s8, 0x7f 5063; GFX6-NEXT: v_and_b32_e32 v6, s8, v0 5064; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 5065; GFX6-NEXT: s_mov_b32 s9, 0 5066; GFX6-NEXT: v_and_b32_e32 v7, s8, v0 5067; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5068; GFX6-NEXT: s_lshr_b32 s8, s1, 31 5069; GFX6-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 5070; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 5071; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v7 5072; GFX6-NEXT: v_lshr_b64 v[0:1], s[10:11], v0 5073; GFX6-NEXT: v_lshl_b64 v[2:3], s[0:1], v7 5074; GFX6-NEXT: v_subrev_i32_e32 v8, vcc, 64, v7 5075; GFX6-NEXT: v_lshl_b64 v[4:5], s[10:11], v7 5076; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 5077; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 5078; GFX6-NEXT: v_lshl_b64 v[0:1], s[10:11], v8 5079; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 5080; GFX6-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc 5081; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc 5082; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5083; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 5084; GFX6-NEXT: v_mov_b32_e32 v2, s0 5085; GFX6-NEXT: v_mov_b32_e32 v3, s1 5086; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 5087; GFX6-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc 5088; GFX6-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc 5089; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v6 5090; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v6 5091; GFX6-NEXT: v_lshl_b64 v[2:3], s[6:7], v2 5092; GFX6-NEXT: v_subrev_i32_e32 v11, vcc, 64, v6 5093; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 5094; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 5095; GFX6-NEXT: v_lshr_b64 v[0:1], s[6:7], v11 5096; GFX6-NEXT: v_lshr_b64 v[4:5], s[6:7], v6 5097; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6 5098; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5099; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 5100; GFX6-NEXT: v_mov_b32_e32 v2, s4 5101; GFX6-NEXT: v_mov_b32_e32 v3, s5 5102; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 5103; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 5104; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 5105; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc 5106; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 5107; GFX6-NEXT: v_or_b32_e32 v0, v8, v0 5108; GFX6-NEXT: v_or_b32_e32 v1, v9, v1 5109; GFX6-NEXT: v_or_b32_e32 v2, v7, v2 5110; GFX6-NEXT: v_or_b32_e32 v3, v10, v3 5111; GFX6-NEXT: ; return to shader part epilog 5112; 5113; GFX8-LABEL: v_fshr_i128_ssv: 5114; GFX8: ; %bb.0: 5115; GFX8-NEXT: s_movk_i32 s8, 0x7f 5116; GFX8-NEXT: v_and_b32_e32 v6, s8, v0 5117; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 5118; GFX8-NEXT: s_mov_b32 s9, 0 5119; GFX8-NEXT: v_and_b32_e32 v7, s8, v0 5120; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5121; GFX8-NEXT: s_lshr_b32 s8, s1, 31 5122; GFX8-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 5123; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 5124; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v7 5125; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[10:11] 5126; GFX8-NEXT: v_lshlrev_b64 v[2:3], v7, s[0:1] 5127; GFX8-NEXT: v_subrev_u32_e32 v8, vcc, 64, v7 5128; GFX8-NEXT: v_lshlrev_b64 v[4:5], v7, s[10:11] 5129; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 5130; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 5131; GFX8-NEXT: v_lshlrev_b64 v[0:1], v8, s[10:11] 5132; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 5133; GFX8-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc 5134; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc 5135; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5136; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 5137; GFX8-NEXT: v_mov_b32_e32 v2, s0 5138; GFX8-NEXT: v_mov_b32_e32 v3, s1 5139; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 5140; GFX8-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc 5141; GFX8-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc 5142; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v6 5143; GFX8-NEXT: v_lshrrev_b64 v[0:1], v6, s[4:5] 5144; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7] 5145; GFX8-NEXT: v_subrev_u32_e32 v11, vcc, 64, v6 5146; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 5147; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 5148; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7] 5149; GFX8-NEXT: v_lshrrev_b64 v[4:5], v6, s[6:7] 5150; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6 5151; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5152; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 5153; GFX8-NEXT: v_mov_b32_e32 v2, s4 5154; GFX8-NEXT: v_mov_b32_e32 v3, s5 5155; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 5156; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 5157; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 5158; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc 5159; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 5160; GFX8-NEXT: v_or_b32_e32 v0, v8, v0 5161; GFX8-NEXT: v_or_b32_e32 v1, v9, v1 5162; GFX8-NEXT: v_or_b32_e32 v2, v7, v2 5163; GFX8-NEXT: v_or_b32_e32 v3, v10, v3 5164; GFX8-NEXT: ; return to shader part epilog 5165; 5166; GFX9-LABEL: v_fshr_i128_ssv: 5167; GFX9: ; %bb.0: 5168; GFX9-NEXT: s_movk_i32 s8, 0x7f 5169; GFX9-NEXT: v_and_b32_e32 v6, s8, v0 5170; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 5171; GFX9-NEXT: s_mov_b32 s9, 0 5172; GFX9-NEXT: v_and_b32_e32 v7, s8, v0 5173; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5174; GFX9-NEXT: s_lshr_b32 s8, s1, 31 5175; GFX9-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 5176; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 5177; GFX9-NEXT: v_sub_u32_e32 v0, 64, v7 5178; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[10:11] 5179; GFX9-NEXT: v_lshlrev_b64 v[2:3], v7, s[0:1] 5180; GFX9-NEXT: v_subrev_u32_e32 v8, 64, v7 5181; GFX9-NEXT: v_lshlrev_b64 v[4:5], v7, s[10:11] 5182; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 5183; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 5184; GFX9-NEXT: v_lshlrev_b64 v[0:1], v8, s[10:11] 5185; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 5186; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v4, vcc 5187; GFX9-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc 5188; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5189; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 5190; GFX9-NEXT: v_mov_b32_e32 v2, s0 5191; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 5192; GFX9-NEXT: v_mov_b32_e32 v3, s1 5193; GFX9-NEXT: v_cndmask_b32_e32 v7, v0, v2, vcc 5194; GFX9-NEXT: v_sub_u32_e32 v2, 64, v6 5195; GFX9-NEXT: v_cndmask_b32_e32 v10, v1, v3, vcc 5196; GFX9-NEXT: v_lshrrev_b64 v[0:1], v6, s[4:5] 5197; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7] 5198; GFX9-NEXT: v_subrev_u32_e32 v11, 64, v6 5199; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 5200; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 5201; GFX9-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7] 5202; GFX9-NEXT: v_lshrrev_b64 v[4:5], v6, s[6:7] 5203; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v6 5204; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 5205; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 5206; GFX9-NEXT: v_mov_b32_e32 v2, s4 5207; GFX9-NEXT: v_mov_b32_e32 v3, s5 5208; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 5209; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 5210; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 5211; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc 5212; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 5213; GFX9-NEXT: v_or_b32_e32 v0, v8, v0 5214; GFX9-NEXT: v_or_b32_e32 v1, v9, v1 5215; GFX9-NEXT: v_or_b32_e32 v2, v7, v2 5216; GFX9-NEXT: v_or_b32_e32 v3, v10, v3 5217; GFX9-NEXT: ; return to shader part epilog 5218; 5219; GFX10-LABEL: v_fshr_i128_ssv: 5220; GFX10: ; %bb.0: 5221; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 5222; GFX10-NEXT: s_movk_i32 s10, 0x7f 5223; GFX10-NEXT: s_mov_b32 s9, 0 5224; GFX10-NEXT: v_and_b32_e32 v13, s10, v0 5225; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5226; GFX10-NEXT: v_and_b32_e32 v12, s10, v1 5227; GFX10-NEXT: s_lshr_b32 s8, s1, 31 5228; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5229; GFX10-NEXT: v_sub_nc_u32_e32 v8, 64, v13 5230; GFX10-NEXT: s_or_b64 s[8:9], s[2:3], s[8:9] 5231; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v12 5232; GFX10-NEXT: v_lshlrev_b64 v[0:1], v12, s[8:9] 5233; GFX10-NEXT: v_subrev_nc_u32_e32 v10, 64, v12 5234; GFX10-NEXT: v_subrev_nc_u32_e32 v14, 64, v13 5235; GFX10-NEXT: v_lshrrev_b64 v[4:5], v13, s[4:5] 5236; GFX10-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1] 5237; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7] 5238; GFX10-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1] 5239; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12 5240; GFX10-NEXT: v_lshlrev_b64 v[6:7], v12, s[0:1] 5241; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 64, v13 5242; GFX10-NEXT: v_or_b32_e32 v2, v2, v0 5243; GFX10-NEXT: v_or_b32_e32 v3, v3, v1 5244; GFX10-NEXT: v_lshrrev_b64 v[0:1], v14, s[6:7] 5245; GFX10-NEXT: v_or_b32_e32 v4, v4, v8 5246; GFX10-NEXT: v_or_b32_e32 v5, v5, v9 5247; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo 5248; GFX10-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo 5249; GFX10-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7] 5250; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 5251; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 0, v13 5252; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v12 5253; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s0 5254; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo 5255; GFX10-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc_lo 5256; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, s1 5257; GFX10-NEXT: v_cndmask_b32_e64 v5, v8, s8, s2 5258; GFX10-NEXT: v_cndmask_b32_e64 v7, v10, s9, s2 5259; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s5, s1 5260; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 5261; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 5262; GFX10-NEXT: v_or_b32_e32 v0, v6, v0 5263; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 5264; GFX10-NEXT: v_or_b32_e32 v2, v5, v2 5265; GFX10-NEXT: v_or_b32_e32 v3, v7, v3 5266; GFX10-NEXT: ; return to shader part epilog 5267 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 5268 %cast.result = bitcast i128 %result to <4 x float> 5269 ret <4 x float> %cast.result 5270} 5271 5272define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) { 5273; GFX6-LABEL: v_fshr_i128_svs: 5274; GFX6: ; %bb.0: 5275; GFX6-NEXT: s_movk_i32 s6, 0x7f 5276; GFX6-NEXT: s_mov_b32 s7, 0 5277; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 5278; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 5279; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5280; GFX6-NEXT: s_lshr_b32 s6, s1, 31 5281; GFX6-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 5282; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7] 5283; GFX6-NEXT: s_sub_i32 s9, s4, 64 5284; GFX6-NEXT: s_sub_i32 s5, 64, s4 5285; GFX6-NEXT: s_cmp_lt_u32 s4, 64 5286; GFX6-NEXT: s_cselect_b32 s12, 1, 0 5287; GFX6-NEXT: s_cmp_eq_u32 s4, 0 5288; GFX6-NEXT: s_cselect_b32 s13, 1, 0 5289; GFX6-NEXT: s_lshl_b64 s[2:3], s[10:11], s4 5290; GFX6-NEXT: s_lshr_b64 s[6:7], s[10:11], s5 5291; GFX6-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 5292; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 5293; GFX6-NEXT: s_lshl_b64 s[6:7], s[10:11], s9 5294; GFX6-NEXT: s_cmp_lg_u32 s12, 0 5295; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 5296; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 5297; GFX6-NEXT: s_cmp_lg_u32 s13, 0 5298; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] 5299; GFX6-NEXT: s_sub_i32 s4, s8, 64 5300; GFX6-NEXT: s_sub_i32 s5, 64, s8 5301; GFX6-NEXT: s_cmp_lt_u32 s8, 64 5302; GFX6-NEXT: s_cselect_b32 s6, 1, 0 5303; GFX6-NEXT: s_cmp_eq_u32 s8, 0 5304; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s8 5305; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s5 5306; GFX6-NEXT: s_cselect_b32 s7, 1, 0 5307; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], s8 5308; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s4 5309; GFX6-NEXT: s_and_b32 s4, 1, s6 5310; GFX6-NEXT: v_or_b32_e32 v4, v4, v6 5311; GFX6-NEXT: v_or_b32_e32 v5, v5, v7 5312; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5313; GFX6-NEXT: s_and_b32 s4, 1, s7 5314; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 5315; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 5316; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5317; GFX6-NEXT: s_and_b32 s4, 1, s6 5318; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 5319; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 5320; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5321; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 5322; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 5323; GFX6-NEXT: v_or_b32_e32 v0, s2, v0 5324; GFX6-NEXT: v_or_b32_e32 v1, s3, v1 5325; GFX6-NEXT: v_or_b32_e32 v2, s0, v2 5326; GFX6-NEXT: v_or_b32_e32 v3, s1, v3 5327; GFX6-NEXT: ; return to shader part epilog 5328; 5329; GFX8-LABEL: v_fshr_i128_svs: 5330; GFX8: ; %bb.0: 5331; GFX8-NEXT: s_movk_i32 s6, 0x7f 5332; GFX8-NEXT: s_mov_b32 s7, 0 5333; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 5334; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 5335; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5336; GFX8-NEXT: s_lshr_b32 s6, s1, 31 5337; GFX8-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 5338; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7] 5339; GFX8-NEXT: s_sub_i32 s9, s4, 64 5340; GFX8-NEXT: s_sub_i32 s5, 64, s4 5341; GFX8-NEXT: s_cmp_lt_u32 s4, 64 5342; GFX8-NEXT: s_cselect_b32 s12, 1, 0 5343; GFX8-NEXT: s_cmp_eq_u32 s4, 0 5344; GFX8-NEXT: s_cselect_b32 s13, 1, 0 5345; GFX8-NEXT: s_lshl_b64 s[2:3], s[10:11], s4 5346; GFX8-NEXT: s_lshr_b64 s[6:7], s[10:11], s5 5347; GFX8-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 5348; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 5349; GFX8-NEXT: s_lshl_b64 s[6:7], s[10:11], s9 5350; GFX8-NEXT: s_cmp_lg_u32 s12, 0 5351; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 5352; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 5353; GFX8-NEXT: s_cmp_lg_u32 s13, 0 5354; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] 5355; GFX8-NEXT: s_sub_i32 s4, s8, 64 5356; GFX8-NEXT: s_sub_i32 s5, 64, s8 5357; GFX8-NEXT: s_cmp_lt_u32 s8, 64 5358; GFX8-NEXT: s_cselect_b32 s6, 1, 0 5359; GFX8-NEXT: s_cmp_eq_u32 s8, 0 5360; GFX8-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] 5361; GFX8-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] 5362; GFX8-NEXT: s_cselect_b32 s7, 1, 0 5363; GFX8-NEXT: v_lshrrev_b64 v[8:9], s8, v[2:3] 5364; GFX8-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] 5365; GFX8-NEXT: s_and_b32 s4, 1, s6 5366; GFX8-NEXT: v_or_b32_e32 v4, v4, v6 5367; GFX8-NEXT: v_or_b32_e32 v5, v5, v7 5368; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5369; GFX8-NEXT: s_and_b32 s4, 1, s7 5370; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 5371; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 5372; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5373; GFX8-NEXT: s_and_b32 s4, 1, s6 5374; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 5375; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 5376; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5377; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 5378; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 5379; GFX8-NEXT: v_or_b32_e32 v0, s2, v0 5380; GFX8-NEXT: v_or_b32_e32 v1, s3, v1 5381; GFX8-NEXT: v_or_b32_e32 v2, s0, v2 5382; GFX8-NEXT: v_or_b32_e32 v3, s1, v3 5383; GFX8-NEXT: ; return to shader part epilog 5384; 5385; GFX9-LABEL: v_fshr_i128_svs: 5386; GFX9: ; %bb.0: 5387; GFX9-NEXT: s_movk_i32 s6, 0x7f 5388; GFX9-NEXT: s_mov_b32 s7, 0 5389; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 5390; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 5391; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5392; GFX9-NEXT: s_lshr_b32 s6, s1, 31 5393; GFX9-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 5394; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[6:7] 5395; GFX9-NEXT: s_sub_i32 s9, s4, 64 5396; GFX9-NEXT: s_sub_i32 s5, 64, s4 5397; GFX9-NEXT: s_cmp_lt_u32 s4, 64 5398; GFX9-NEXT: s_cselect_b32 s12, 1, 0 5399; GFX9-NEXT: s_cmp_eq_u32 s4, 0 5400; GFX9-NEXT: s_cselect_b32 s13, 1, 0 5401; GFX9-NEXT: s_lshl_b64 s[2:3], s[10:11], s4 5402; GFX9-NEXT: s_lshr_b64 s[6:7], s[10:11], s5 5403; GFX9-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 5404; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5] 5405; GFX9-NEXT: s_lshl_b64 s[6:7], s[10:11], s9 5406; GFX9-NEXT: s_cmp_lg_u32 s12, 0 5407; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 5408; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 5409; GFX9-NEXT: s_cmp_lg_u32 s13, 0 5410; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] 5411; GFX9-NEXT: s_sub_i32 s4, s8, 64 5412; GFX9-NEXT: s_sub_i32 s5, 64, s8 5413; GFX9-NEXT: s_cmp_lt_u32 s8, 64 5414; GFX9-NEXT: s_cselect_b32 s6, 1, 0 5415; GFX9-NEXT: s_cmp_eq_u32 s8, 0 5416; GFX9-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] 5417; GFX9-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] 5418; GFX9-NEXT: s_cselect_b32 s7, 1, 0 5419; GFX9-NEXT: v_lshrrev_b64 v[8:9], s8, v[2:3] 5420; GFX9-NEXT: v_lshrrev_b64 v[2:3], s4, v[2:3] 5421; GFX9-NEXT: s_and_b32 s4, 1, s6 5422; GFX9-NEXT: v_or_b32_e32 v4, v4, v6 5423; GFX9-NEXT: v_or_b32_e32 v5, v5, v7 5424; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5425; GFX9-NEXT: s_and_b32 s4, 1, s7 5426; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 5427; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 5428; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5429; GFX9-NEXT: s_and_b32 s4, 1, s6 5430; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 5431; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 5432; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5433; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 5434; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 5435; GFX9-NEXT: v_or_b32_e32 v0, s2, v0 5436; GFX9-NEXT: v_or_b32_e32 v1, s3, v1 5437; GFX9-NEXT: v_or_b32_e32 v2, s0, v2 5438; GFX9-NEXT: v_or_b32_e32 v3, s1, v3 5439; GFX9-NEXT: ; return to shader part epilog 5440; 5441; GFX10-LABEL: v_fshr_i128_svs: 5442; GFX10: ; %bb.0: 5443; GFX10-NEXT: s_movk_i32 s6, 0x7f 5444; GFX10-NEXT: s_mov_b32 s7, 0 5445; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5446; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 5447; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 5448; GFX10-NEXT: s_lshr_b32 s6, s1, 31 5449; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5450; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] 5451; GFX10-NEXT: s_sub_i32 s9, s4, 64 5452; GFX10-NEXT: s_sub_i32 s5, 64, s4 5453; GFX10-NEXT: s_cmp_lt_u32 s4, 64 5454; GFX10-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] 5455; GFX10-NEXT: s_cselect_b32 s12, 1, 0 5456; GFX10-NEXT: s_cmp_eq_u32 s4, 0 5457; GFX10-NEXT: s_cselect_b32 s13, 1, 0 5458; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s5 5459; GFX10-NEXT: s_lshl_b64 s[10:11], s[2:3], s4 5460; GFX10-NEXT: s_lshl_b64 s[4:5], s[0:1], s4 5461; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] 5462; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 5463; GFX10-NEXT: s_cmp_lg_u32 s12, 0 5464; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], 0 5465; GFX10-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] 5466; GFX10-NEXT: s_cmp_lg_u32 s13, 0 5467; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 5468; GFX10-NEXT: s_sub_i32 s0, 64, s8 5469; GFX10-NEXT: v_lshlrev_b64 v[6:7], s0, v[2:3] 5470; GFX10-NEXT: s_sub_i32 s0, s8, 64 5471; GFX10-NEXT: s_cmp_lt_u32 s8, 64 5472; GFX10-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] 5473; GFX10-NEXT: s_cselect_b32 s1, 1, 0 5474; GFX10-NEXT: s_cmp_eq_u32 s8, 0 5475; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 5476; GFX10-NEXT: s_cselect_b32 s6, 1, 0 5477; GFX10-NEXT: s_and_b32 s0, 1, s1 5478; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 5479; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 5480; GFX10-NEXT: s_and_b32 s0, 1, s6 5481; GFX10-NEXT: s_and_b32 s1, 1, s1 5482; GFX10-NEXT: v_lshrrev_b64 v[2:3], s8, v[2:3] 5483; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc_lo 5484; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc_lo 5485; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 5486; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s1 5487; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc_lo 5488; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo 5489; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 5490; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 5491; GFX10-NEXT: v_or_b32_e32 v0, s4, v0 5492; GFX10-NEXT: v_or_b32_e32 v1, s5, v1 5493; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 5494; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 5495; GFX10-NEXT: ; return to shader part epilog 5496 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 5497 %cast.result = bitcast i128 %result to <4 x float> 5498 ret <4 x float> %cast.result 5499} 5500 5501define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) { 5502; GFX6-LABEL: v_fshr_i128_vss: 5503; GFX6: ; %bb.0: 5504; GFX6-NEXT: s_mov_b64 s[6:7], 0x7f 5505; GFX6-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 5506; GFX6-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 5507; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 5508; GFX6-NEXT: s_sub_i32 s5, s4, 64 5509; GFX6-NEXT: s_sub_i32 s6, 64, s4 5510; GFX6-NEXT: v_lshl_b64 v[4:5], v[0:1], 1 5511; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1 5512; GFX6-NEXT: s_cmp_lt_u32 s4, 64 5513; GFX6-NEXT: v_or_b32_e32 v2, v2, v0 5514; GFX6-NEXT: s_cselect_b32 s7, 1, 0 5515; GFX6-NEXT: s_cmp_eq_u32 s4, 0 5516; GFX6-NEXT: s_cselect_b32 s9, 1, 0 5517; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], s6 5518; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s4 5519; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], s4 5520; GFX6-NEXT: s_and_b32 s4, 1, s7 5521; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5522; GFX6-NEXT: s_and_b32 s4, 1, s9 5523; GFX6-NEXT: s_sub_i32 s10, s8, 64 5524; GFX6-NEXT: s_sub_i32 s9, 64, s8 5525; GFX6-NEXT: v_or_b32_e32 v6, v0, v6 5526; GFX6-NEXT: v_or_b32_e32 v7, v1, v7 5527; GFX6-NEXT: v_lshl_b64 v[0:1], v[4:5], s5 5528; GFX6-NEXT: s_cmp_lt_u32 s8, 64 5529; GFX6-NEXT: s_cselect_b32 s11, 1, 0 5530; GFX6-NEXT: s_cmp_eq_u32 s8, 0 5531; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc 5532; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc 5533; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 5534; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 5535; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5536; GFX6-NEXT: s_cselect_b32 s12, 1, 0 5537; GFX6-NEXT: s_lshr_b64 s[4:5], s[2:3], s8 5538; GFX6-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 5539; GFX6-NEXT: s_lshl_b64 s[8:9], s[2:3], s9 5540; GFX6-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 5541; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 5542; GFX6-NEXT: s_cmp_lg_u32 s11, 0 5543; GFX6-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] 5544; GFX6-NEXT: s_cmp_lg_u32 s12, 0 5545; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 5546; GFX6-NEXT: s_cmp_lg_u32 s11, 0 5547; GFX6-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 5548; GFX6-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 5549; GFX6-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 5550; GFX6-NEXT: v_or_b32_e32 v0, s0, v4 5551; GFX6-NEXT: v_or_b32_e32 v1, s1, v5 5552; GFX6-NEXT: v_or_b32_e32 v2, s2, v2 5553; GFX6-NEXT: v_or_b32_e32 v3, s3, v3 5554; GFX6-NEXT: ; return to shader part epilog 5555; 5556; GFX8-LABEL: v_fshr_i128_vss: 5557; GFX8: ; %bb.0: 5558; GFX8-NEXT: s_mov_b64 s[6:7], 0x7f 5559; GFX8-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 5560; GFX8-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 5561; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5562; GFX8-NEXT: s_sub_i32 s5, s4, 64 5563; GFX8-NEXT: s_sub_i32 s6, 64, s4 5564; GFX8-NEXT: v_lshlrev_b64 v[4:5], 1, v[0:1] 5565; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1 5566; GFX8-NEXT: s_cmp_lt_u32 s4, 64 5567; GFX8-NEXT: v_or_b32_e32 v2, v2, v0 5568; GFX8-NEXT: s_cselect_b32 s7, 1, 0 5569; GFX8-NEXT: s_cmp_eq_u32 s4, 0 5570; GFX8-NEXT: s_cselect_b32 s9, 1, 0 5571; GFX8-NEXT: v_lshrrev_b64 v[0:1], s6, v[4:5] 5572; GFX8-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] 5573; GFX8-NEXT: v_lshlrev_b64 v[8:9], s4, v[4:5] 5574; GFX8-NEXT: s_and_b32 s4, 1, s7 5575; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5576; GFX8-NEXT: s_and_b32 s4, 1, s9 5577; GFX8-NEXT: s_sub_i32 s10, s8, 64 5578; GFX8-NEXT: s_sub_i32 s9, 64, s8 5579; GFX8-NEXT: v_or_b32_e32 v6, v0, v6 5580; GFX8-NEXT: v_or_b32_e32 v7, v1, v7 5581; GFX8-NEXT: v_lshlrev_b64 v[0:1], s5, v[4:5] 5582; GFX8-NEXT: s_cmp_lt_u32 s8, 64 5583; GFX8-NEXT: s_cselect_b32 s11, 1, 0 5584; GFX8-NEXT: s_cmp_eq_u32 s8, 0 5585; GFX8-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc 5586; GFX8-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc 5587; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 5588; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 5589; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5590; GFX8-NEXT: s_cselect_b32 s12, 1, 0 5591; GFX8-NEXT: s_lshr_b64 s[4:5], s[2:3], s8 5592; GFX8-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 5593; GFX8-NEXT: s_lshl_b64 s[8:9], s[2:3], s9 5594; GFX8-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 5595; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 5596; GFX8-NEXT: s_cmp_lg_u32 s11, 0 5597; GFX8-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] 5598; GFX8-NEXT: s_cmp_lg_u32 s12, 0 5599; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 5600; GFX8-NEXT: s_cmp_lg_u32 s11, 0 5601; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 5602; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 5603; GFX8-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 5604; GFX8-NEXT: v_or_b32_e32 v0, s0, v4 5605; GFX8-NEXT: v_or_b32_e32 v1, s1, v5 5606; GFX8-NEXT: v_or_b32_e32 v2, s2, v2 5607; GFX8-NEXT: v_or_b32_e32 v3, s3, v3 5608; GFX8-NEXT: ; return to shader part epilog 5609; 5610; GFX9-LABEL: v_fshr_i128_vss: 5611; GFX9: ; %bb.0: 5612; GFX9-NEXT: s_mov_b64 s[6:7], 0x7f 5613; GFX9-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 5614; GFX9-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 5615; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5616; GFX9-NEXT: s_sub_i32 s5, s4, 64 5617; GFX9-NEXT: s_sub_i32 s6, 64, s4 5618; GFX9-NEXT: v_lshlrev_b64 v[4:5], 1, v[0:1] 5619; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1 5620; GFX9-NEXT: s_cmp_lt_u32 s4, 64 5621; GFX9-NEXT: v_or_b32_e32 v2, v2, v0 5622; GFX9-NEXT: s_cselect_b32 s7, 1, 0 5623; GFX9-NEXT: s_cmp_eq_u32 s4, 0 5624; GFX9-NEXT: s_cselect_b32 s9, 1, 0 5625; GFX9-NEXT: v_lshrrev_b64 v[0:1], s6, v[4:5] 5626; GFX9-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] 5627; GFX9-NEXT: v_lshlrev_b64 v[8:9], s4, v[4:5] 5628; GFX9-NEXT: s_and_b32 s4, 1, s7 5629; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5630; GFX9-NEXT: s_and_b32 s4, 1, s9 5631; GFX9-NEXT: s_sub_i32 s10, s8, 64 5632; GFX9-NEXT: s_sub_i32 s9, 64, s8 5633; GFX9-NEXT: v_or_b32_e32 v6, v0, v6 5634; GFX9-NEXT: v_or_b32_e32 v7, v1, v7 5635; GFX9-NEXT: v_lshlrev_b64 v[0:1], s5, v[4:5] 5636; GFX9-NEXT: s_cmp_lt_u32 s8, 64 5637; GFX9-NEXT: s_cselect_b32 s11, 1, 0 5638; GFX9-NEXT: s_cmp_eq_u32 s8, 0 5639; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc 5640; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc 5641; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 5642; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 5643; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s4 5644; GFX9-NEXT: s_cselect_b32 s12, 1, 0 5645; GFX9-NEXT: s_lshr_b64 s[4:5], s[2:3], s8 5646; GFX9-NEXT: s_lshr_b64 s[6:7], s[0:1], s8 5647; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s9 5648; GFX9-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 5649; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 5650; GFX9-NEXT: s_cmp_lg_u32 s11, 0 5651; GFX9-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] 5652; GFX9-NEXT: s_cmp_lg_u32 s12, 0 5653; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 5654; GFX9-NEXT: s_cmp_lg_u32 s11, 0 5655; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 5656; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 5657; GFX9-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 5658; GFX9-NEXT: v_or_b32_e32 v0, s0, v4 5659; GFX9-NEXT: v_or_b32_e32 v1, s1, v5 5660; GFX9-NEXT: v_or_b32_e32 v2, s2, v2 5661; GFX9-NEXT: v_or_b32_e32 v3, s3, v3 5662; GFX9-NEXT: ; return to shader part epilog 5663; 5664; GFX10-LABEL: v_fshr_i128_vss: 5665; GFX10: ; %bb.0: 5666; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5667; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v1 5668; GFX10-NEXT: s_mov_b64 s[6:7], 0x7f 5669; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5670; GFX10-NEXT: s_and_b64 s[8:9], s[4:5], s[6:7] 5671; GFX10-NEXT: s_andn2_b64 s[4:5], s[6:7], s[4:5] 5672; GFX10-NEXT: v_or_b32_e32 v2, v2, v4 5673; GFX10-NEXT: s_sub_i32 s6, 64, s4 5674; GFX10-NEXT: s_sub_i32 s5, s4, 64 5675; GFX10-NEXT: s_cmp_lt_u32 s4, 64 5676; GFX10-NEXT: v_lshrrev_b64 v[4:5], s6, v[0:1] 5677; GFX10-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] 5678; GFX10-NEXT: s_cselect_b32 s7, 1, 0 5679; GFX10-NEXT: s_cmp_eq_u32 s4, 0 5680; GFX10-NEXT: v_lshlrev_b64 v[8:9], s4, v[0:1] 5681; GFX10-NEXT: s_cselect_b32 s9, 1, 0 5682; GFX10-NEXT: s_and_b32 s4, 1, s7 5683; GFX10-NEXT: v_lshlrev_b64 v[0:1], s5, v[0:1] 5684; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 5685; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 5686; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 5687; GFX10-NEXT: s_and_b32 s4, 1, s9 5688; GFX10-NEXT: s_sub_i32 s10, s8, 64 5689; GFX10-NEXT: s_sub_i32 s6, 64, s8 5690; GFX10-NEXT: s_cmp_lt_u32 s8, 64 5691; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc_lo 5692; GFX10-NEXT: s_cselect_b32 s11, 1, 0 5693; GFX10-NEXT: s_cmp_eq_u32 s8, 0 5694; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc_lo 5695; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 5696; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 5697; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s4 5698; GFX10-NEXT: s_cselect_b32 s12, 1, 0 5699; GFX10-NEXT: s_lshr_b64 s[4:5], s[0:1], s8 5700; GFX10-NEXT: s_lshl_b64 s[6:7], s[2:3], s6 5701; GFX10-NEXT: s_lshr_b64 s[8:9], s[2:3], s8 5702; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] 5703; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 5704; GFX10-NEXT: s_cmp_lg_u32 s11, 0 5705; GFX10-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc_lo 5706; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] 5707; GFX10-NEXT: s_cmp_lg_u32 s12, 0 5708; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc_lo 5709; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 5710; GFX10-NEXT: s_cmp_lg_u32 s11, 0 5711; GFX10-NEXT: v_or_b32_e32 v0, s0, v6 5712; GFX10-NEXT: s_cselect_b64 s[2:3], s[8:9], 0 5713; GFX10-NEXT: v_or_b32_e32 v1, s1, v7 5714; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 5715; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 5716; GFX10-NEXT: ; return to shader part epilog 5717 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 5718 %cast.result = bitcast i128 %result to <4 x float> 5719 ret <4 x float> %cast.result 5720} 5721 5722define amdgpu_ps i128 @s_fshr_i128_65(i128 inreg %lhs, i128 inreg %rhs) { 5723; GFX6-LABEL: s_fshr_i128_65: 5724; GFX6: ; %bb.0: 5725; GFX6-NEXT: s_mov_b32 s4, 0 5726; GFX6-NEXT: s_lshl_b32 s5, s0, 31 5727; GFX6-NEXT: s_lshl_b32 s3, s2, 31 5728; GFX6-NEXT: s_mov_b32 s2, s4 5729; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 5730; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1] 5731; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], 1 5732; GFX6-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 5733; GFX6-NEXT: ; return to shader part epilog 5734; 5735; GFX8-LABEL: s_fshr_i128_65: 5736; GFX8: ; %bb.0: 5737; GFX8-NEXT: s_mov_b32 s4, 0 5738; GFX8-NEXT: s_lshl_b32 s5, s0, 31 5739; GFX8-NEXT: s_lshl_b32 s3, s2, 31 5740; GFX8-NEXT: s_mov_b32 s2, s4 5741; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 5742; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1] 5743; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], 1 5744; GFX8-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 5745; GFX8-NEXT: ; return to shader part epilog 5746; 5747; GFX9-LABEL: s_fshr_i128_65: 5748; GFX9: ; %bb.0: 5749; GFX9-NEXT: s_mov_b32 s4, 0 5750; GFX9-NEXT: s_lshl_b32 s5, s0, 31 5751; GFX9-NEXT: s_lshl_b32 s3, s2, 31 5752; GFX9-NEXT: s_mov_b32 s2, s4 5753; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 5754; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1] 5755; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], 1 5756; GFX9-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 5757; GFX9-NEXT: ; return to shader part epilog 5758; 5759; GFX10-LABEL: s_fshr_i128_65: 5760; GFX10: ; %bb.0: 5761; GFX10-NEXT: s_mov_b32 s4, 0 5762; GFX10-NEXT: s_lshl_b32 s5, s0, 31 5763; GFX10-NEXT: s_lshl_b32 s3, s2, 31 5764; GFX10-NEXT: s_mov_b32 s2, s4 5765; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], 1 5766; GFX10-NEXT: s_lshr_b64 s[8:9], s[0:1], 1 5767; GFX10-NEXT: s_or_b64 s[0:1], s[4:5], s[6:7] 5768; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 5769; GFX10-NEXT: ; return to shader part epilog 5770 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65) 5771 ret i128 %result 5772} 5773 5774define i128 @v_fshr_i128_65(i128 %lhs, i128 %rhs) { 5775; GFX6-LABEL: v_fshr_i128_65: 5776; GFX6: ; %bb.0: 5777; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5778; GFX6-NEXT: v_lshlrev_b32_e32 v4, 31, v0 5779; GFX6-NEXT: v_lshlrev_b32_e32 v5, 31, v2 5780; GFX6-NEXT: v_lshr_b64 v[2:3], v[0:1], 1 5781; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], 1 5782; GFX6-NEXT: v_or_b32_e32 v3, v5, v3 5783; GFX6-NEXT: v_or_b32_e32 v1, v4, v1 5784; GFX6-NEXT: s_setpc_b64 s[30:31] 5785; 5786; GFX8-LABEL: v_fshr_i128_65: 5787; GFX8: ; %bb.0: 5788; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5789; GFX8-NEXT: v_lshlrev_b32_e32 v4, 31, v0 5790; GFX8-NEXT: v_lshlrev_b32_e32 v5, 31, v2 5791; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] 5792; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[6:7] 5793; GFX8-NEXT: v_or_b32_e32 v3, v5, v3 5794; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 5795; GFX8-NEXT: s_setpc_b64 s[30:31] 5796; 5797; GFX9-LABEL: v_fshr_i128_65: 5798; GFX9: ; %bb.0: 5799; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5800; GFX9-NEXT: v_lshlrev_b32_e32 v4, 31, v0 5801; GFX9-NEXT: v_lshlrev_b32_e32 v5, 31, v2 5802; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] 5803; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[6:7] 5804; GFX9-NEXT: v_or_b32_e32 v3, v5, v3 5805; GFX9-NEXT: v_or_b32_e32 v1, v4, v1 5806; GFX9-NEXT: s_setpc_b64 s[30:31] 5807; 5808; GFX10-LABEL: v_fshr_i128_65: 5809; GFX10: ; %bb.0: 5810; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5811; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 5812; GFX10-NEXT: v_mov_b32_e32 v8, v2 5813; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7] 5814; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] 5815; GFX10-NEXT: v_lshlrev_b32_e32 v9, 31, v0 5816; GFX10-NEXT: v_lshlrev_b32_e32 v0, 31, v8 5817; GFX10-NEXT: v_or_b32_e32 v1, v9, v5 5818; GFX10-NEXT: v_or_b32_e32 v3, v0, v3 5819; GFX10-NEXT: v_mov_b32_e32 v0, v4 5820; GFX10-NEXT: s_setpc_b64 s[30:31] 5821 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65) 5822 ret i128 %result 5823} 5824 5825define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) { 5826; GFX6-LABEL: s_fshr_v2i128: 5827; GFX6: ; %bb.0: 5828; GFX6-NEXT: s_movk_i32 s18, 0x7f 5829; GFX6-NEXT: s_mov_b32 s19, 0 5830; GFX6-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] 5831; GFX6-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] 5832; GFX6-NEXT: s_lshl_b64 s[24:25], s[0:1], 1 5833; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5834; GFX6-NEXT: s_lshr_b32 s0, s1, 31 5835; GFX6-NEXT: s_mov_b32 s1, s19 5836; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 5837; GFX6-NEXT: s_sub_i32 s23, s16, 64 5838; GFX6-NEXT: s_sub_i32 s17, 64, s16 5839; GFX6-NEXT: s_cmp_lt_u32 s16, 64 5840; GFX6-NEXT: s_cselect_b32 s28, 1, 0 5841; GFX6-NEXT: s_cmp_eq_u32 s16, 0 5842; GFX6-NEXT: s_cselect_b32 s29, 1, 0 5843; GFX6-NEXT: s_lshl_b64 s[2:3], s[24:25], s16 5844; GFX6-NEXT: s_lshr_b64 s[26:27], s[24:25], s17 5845; GFX6-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 5846; GFX6-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17] 5847; GFX6-NEXT: s_lshl_b64 s[24:25], s[24:25], s23 5848; GFX6-NEXT: s_cmp_lg_u32 s28, 0 5849; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 5850; GFX6-NEXT: s_cselect_b64 s[16:17], s[16:17], s[24:25] 5851; GFX6-NEXT: s_cmp_lg_u32 s29, 0 5852; GFX6-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17] 5853; GFX6-NEXT: s_sub_i32 s26, s22, 64 5854; GFX6-NEXT: s_sub_i32 s24, 64, s22 5855; GFX6-NEXT: s_cmp_lt_u32 s22, 64 5856; GFX6-NEXT: s_cselect_b32 s27, 1, 0 5857; GFX6-NEXT: s_cmp_eq_u32 s22, 0 5858; GFX6-NEXT: s_cselect_b32 s28, 1, 0 5859; GFX6-NEXT: s_lshr_b64 s[0:1], s[10:11], s22 5860; GFX6-NEXT: s_lshr_b64 s[22:23], s[8:9], s22 5861; GFX6-NEXT: s_lshl_b64 s[24:25], s[10:11], s24 5862; GFX6-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] 5863; GFX6-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 5864; GFX6-NEXT: s_cmp_lg_u32 s27, 0 5865; GFX6-NEXT: s_cselect_b64 s[10:11], s[22:23], s[10:11] 5866; GFX6-NEXT: s_cmp_lg_u32 s28, 0 5867; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 5868; GFX6-NEXT: s_cmp_lg_u32 s27, 0 5869; GFX6-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 5870; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 5871; GFX6-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11] 5872; GFX6-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] 5873; GFX6-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] 5874; GFX6-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 5875; GFX6-NEXT: s_lshr_b32 s18, s5, 31 5876; GFX6-NEXT: s_lshl_b64 s[16:17], s[4:5], 1 5877; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[18:19] 5878; GFX6-NEXT: s_sub_i32 s9, s10, 64 5879; GFX6-NEXT: s_sub_i32 s11, 64, s10 5880; GFX6-NEXT: s_cmp_lt_u32 s10, 64 5881; GFX6-NEXT: s_cselect_b32 s20, 1, 0 5882; GFX6-NEXT: s_cmp_eq_u32 s10, 0 5883; GFX6-NEXT: s_cselect_b32 s21, 1, 0 5884; GFX6-NEXT: s_lshl_b64 s[6:7], s[16:17], s10 5885; GFX6-NEXT: s_lshr_b64 s[18:19], s[16:17], s11 5886; GFX6-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 5887; GFX6-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11] 5888; GFX6-NEXT: s_lshl_b64 s[16:17], s[16:17], s9 5889; GFX6-NEXT: s_cmp_lg_u32 s20, 0 5890; GFX6-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 5891; GFX6-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17] 5892; GFX6-NEXT: s_cmp_lg_u32 s21, 0 5893; GFX6-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11] 5894; GFX6-NEXT: s_sub_i32 s18, s8, 64 5895; GFX6-NEXT: s_sub_i32 s16, 64, s8 5896; GFX6-NEXT: s_cmp_lt_u32 s8, 64 5897; GFX6-NEXT: s_cselect_b32 s19, 1, 0 5898; GFX6-NEXT: s_cmp_eq_u32 s8, 0 5899; GFX6-NEXT: s_cselect_b32 s20, 1, 0 5900; GFX6-NEXT: s_lshr_b64 s[4:5], s[14:15], s8 5901; GFX6-NEXT: s_lshr_b64 s[8:9], s[12:13], s8 5902; GFX6-NEXT: s_lshl_b64 s[16:17], s[14:15], s16 5903; GFX6-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17] 5904; GFX6-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 5905; GFX6-NEXT: s_cmp_lg_u32 s19, 0 5906; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15] 5907; GFX6-NEXT: s_cmp_lg_u32 s20, 0 5908; GFX6-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9] 5909; GFX6-NEXT: s_cmp_lg_u32 s19, 0 5910; GFX6-NEXT: s_cselect_b64 s[12:13], s[4:5], 0 5911; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9] 5912; GFX6-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13] 5913; GFX6-NEXT: ; return to shader part epilog 5914; 5915; GFX8-LABEL: s_fshr_v2i128: 5916; GFX8: ; %bb.0: 5917; GFX8-NEXT: s_movk_i32 s18, 0x7f 5918; GFX8-NEXT: s_mov_b32 s19, 0 5919; GFX8-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] 5920; GFX8-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] 5921; GFX8-NEXT: s_lshl_b64 s[24:25], s[0:1], 1 5922; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5923; GFX8-NEXT: s_lshr_b32 s0, s1, 31 5924; GFX8-NEXT: s_mov_b32 s1, s19 5925; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 5926; GFX8-NEXT: s_sub_i32 s23, s16, 64 5927; GFX8-NEXT: s_sub_i32 s17, 64, s16 5928; GFX8-NEXT: s_cmp_lt_u32 s16, 64 5929; GFX8-NEXT: s_cselect_b32 s28, 1, 0 5930; GFX8-NEXT: s_cmp_eq_u32 s16, 0 5931; GFX8-NEXT: s_cselect_b32 s29, 1, 0 5932; GFX8-NEXT: s_lshl_b64 s[2:3], s[24:25], s16 5933; GFX8-NEXT: s_lshr_b64 s[26:27], s[24:25], s17 5934; GFX8-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 5935; GFX8-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17] 5936; GFX8-NEXT: s_lshl_b64 s[24:25], s[24:25], s23 5937; GFX8-NEXT: s_cmp_lg_u32 s28, 0 5938; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 5939; GFX8-NEXT: s_cselect_b64 s[16:17], s[16:17], s[24:25] 5940; GFX8-NEXT: s_cmp_lg_u32 s29, 0 5941; GFX8-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17] 5942; GFX8-NEXT: s_sub_i32 s26, s22, 64 5943; GFX8-NEXT: s_sub_i32 s24, 64, s22 5944; GFX8-NEXT: s_cmp_lt_u32 s22, 64 5945; GFX8-NEXT: s_cselect_b32 s27, 1, 0 5946; GFX8-NEXT: s_cmp_eq_u32 s22, 0 5947; GFX8-NEXT: s_cselect_b32 s28, 1, 0 5948; GFX8-NEXT: s_lshr_b64 s[0:1], s[10:11], s22 5949; GFX8-NEXT: s_lshr_b64 s[22:23], s[8:9], s22 5950; GFX8-NEXT: s_lshl_b64 s[24:25], s[10:11], s24 5951; GFX8-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] 5952; GFX8-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 5953; GFX8-NEXT: s_cmp_lg_u32 s27, 0 5954; GFX8-NEXT: s_cselect_b64 s[10:11], s[22:23], s[10:11] 5955; GFX8-NEXT: s_cmp_lg_u32 s28, 0 5956; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 5957; GFX8-NEXT: s_cmp_lg_u32 s27, 0 5958; GFX8-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 5959; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 5960; GFX8-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11] 5961; GFX8-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] 5962; GFX8-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] 5963; GFX8-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 5964; GFX8-NEXT: s_lshr_b32 s18, s5, 31 5965; GFX8-NEXT: s_lshl_b64 s[16:17], s[4:5], 1 5966; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[18:19] 5967; GFX8-NEXT: s_sub_i32 s9, s10, 64 5968; GFX8-NEXT: s_sub_i32 s11, 64, s10 5969; GFX8-NEXT: s_cmp_lt_u32 s10, 64 5970; GFX8-NEXT: s_cselect_b32 s20, 1, 0 5971; GFX8-NEXT: s_cmp_eq_u32 s10, 0 5972; GFX8-NEXT: s_cselect_b32 s21, 1, 0 5973; GFX8-NEXT: s_lshl_b64 s[6:7], s[16:17], s10 5974; GFX8-NEXT: s_lshr_b64 s[18:19], s[16:17], s11 5975; GFX8-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 5976; GFX8-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11] 5977; GFX8-NEXT: s_lshl_b64 s[16:17], s[16:17], s9 5978; GFX8-NEXT: s_cmp_lg_u32 s20, 0 5979; GFX8-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 5980; GFX8-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17] 5981; GFX8-NEXT: s_cmp_lg_u32 s21, 0 5982; GFX8-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11] 5983; GFX8-NEXT: s_sub_i32 s18, s8, 64 5984; GFX8-NEXT: s_sub_i32 s16, 64, s8 5985; GFX8-NEXT: s_cmp_lt_u32 s8, 64 5986; GFX8-NEXT: s_cselect_b32 s19, 1, 0 5987; GFX8-NEXT: s_cmp_eq_u32 s8, 0 5988; GFX8-NEXT: s_cselect_b32 s20, 1, 0 5989; GFX8-NEXT: s_lshr_b64 s[4:5], s[14:15], s8 5990; GFX8-NEXT: s_lshr_b64 s[8:9], s[12:13], s8 5991; GFX8-NEXT: s_lshl_b64 s[16:17], s[14:15], s16 5992; GFX8-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17] 5993; GFX8-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 5994; GFX8-NEXT: s_cmp_lg_u32 s19, 0 5995; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15] 5996; GFX8-NEXT: s_cmp_lg_u32 s20, 0 5997; GFX8-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9] 5998; GFX8-NEXT: s_cmp_lg_u32 s19, 0 5999; GFX8-NEXT: s_cselect_b64 s[12:13], s[4:5], 0 6000; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9] 6001; GFX8-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13] 6002; GFX8-NEXT: ; return to shader part epilog 6003; 6004; GFX9-LABEL: s_fshr_v2i128: 6005; GFX9: ; %bb.0: 6006; GFX9-NEXT: s_movk_i32 s18, 0x7f 6007; GFX9-NEXT: s_mov_b32 s19, 0 6008; GFX9-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] 6009; GFX9-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] 6010; GFX9-NEXT: s_lshl_b64 s[24:25], s[0:1], 1 6011; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6012; GFX9-NEXT: s_lshr_b32 s0, s1, 31 6013; GFX9-NEXT: s_mov_b32 s1, s19 6014; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 6015; GFX9-NEXT: s_sub_i32 s23, s16, 64 6016; GFX9-NEXT: s_sub_i32 s17, 64, s16 6017; GFX9-NEXT: s_cmp_lt_u32 s16, 64 6018; GFX9-NEXT: s_cselect_b32 s28, 1, 0 6019; GFX9-NEXT: s_cmp_eq_u32 s16, 0 6020; GFX9-NEXT: s_cselect_b32 s29, 1, 0 6021; GFX9-NEXT: s_lshl_b64 s[2:3], s[24:25], s16 6022; GFX9-NEXT: s_lshr_b64 s[26:27], s[24:25], s17 6023; GFX9-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 6024; GFX9-NEXT: s_or_b64 s[16:17], s[26:27], s[16:17] 6025; GFX9-NEXT: s_lshl_b64 s[24:25], s[24:25], s23 6026; GFX9-NEXT: s_cmp_lg_u32 s28, 0 6027; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 6028; GFX9-NEXT: s_cselect_b64 s[16:17], s[16:17], s[24:25] 6029; GFX9-NEXT: s_cmp_lg_u32 s29, 0 6030; GFX9-NEXT: s_cselect_b64 s[16:17], s[0:1], s[16:17] 6031; GFX9-NEXT: s_sub_i32 s26, s22, 64 6032; GFX9-NEXT: s_sub_i32 s24, 64, s22 6033; GFX9-NEXT: s_cmp_lt_u32 s22, 64 6034; GFX9-NEXT: s_cselect_b32 s27, 1, 0 6035; GFX9-NEXT: s_cmp_eq_u32 s22, 0 6036; GFX9-NEXT: s_cselect_b32 s28, 1, 0 6037; GFX9-NEXT: s_lshr_b64 s[0:1], s[10:11], s22 6038; GFX9-NEXT: s_lshr_b64 s[22:23], s[8:9], s22 6039; GFX9-NEXT: s_lshl_b64 s[24:25], s[10:11], s24 6040; GFX9-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] 6041; GFX9-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 6042; GFX9-NEXT: s_cmp_lg_u32 s27, 0 6043; GFX9-NEXT: s_cselect_b64 s[10:11], s[22:23], s[10:11] 6044; GFX9-NEXT: s_cmp_lg_u32 s28, 0 6045; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 6046; GFX9-NEXT: s_cmp_lg_u32 s27, 0 6047; GFX9-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 6048; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 6049; GFX9-NEXT: s_or_b64 s[2:3], s[16:17], s[10:11] 6050; GFX9-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] 6051; GFX9-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] 6052; GFX9-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 6053; GFX9-NEXT: s_lshr_b32 s18, s5, 31 6054; GFX9-NEXT: s_lshl_b64 s[16:17], s[4:5], 1 6055; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[18:19] 6056; GFX9-NEXT: s_sub_i32 s9, s10, 64 6057; GFX9-NEXT: s_sub_i32 s11, 64, s10 6058; GFX9-NEXT: s_cmp_lt_u32 s10, 64 6059; GFX9-NEXT: s_cselect_b32 s20, 1, 0 6060; GFX9-NEXT: s_cmp_eq_u32 s10, 0 6061; GFX9-NEXT: s_cselect_b32 s21, 1, 0 6062; GFX9-NEXT: s_lshl_b64 s[6:7], s[16:17], s10 6063; GFX9-NEXT: s_lshr_b64 s[18:19], s[16:17], s11 6064; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 6065; GFX9-NEXT: s_or_b64 s[10:11], s[18:19], s[10:11] 6066; GFX9-NEXT: s_lshl_b64 s[16:17], s[16:17], s9 6067; GFX9-NEXT: s_cmp_lg_u32 s20, 0 6068; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 6069; GFX9-NEXT: s_cselect_b64 s[10:11], s[10:11], s[16:17] 6070; GFX9-NEXT: s_cmp_lg_u32 s21, 0 6071; GFX9-NEXT: s_cselect_b64 s[10:11], s[4:5], s[10:11] 6072; GFX9-NEXT: s_sub_i32 s18, s8, 64 6073; GFX9-NEXT: s_sub_i32 s16, 64, s8 6074; GFX9-NEXT: s_cmp_lt_u32 s8, 64 6075; GFX9-NEXT: s_cselect_b32 s19, 1, 0 6076; GFX9-NEXT: s_cmp_eq_u32 s8, 0 6077; GFX9-NEXT: s_cselect_b32 s20, 1, 0 6078; GFX9-NEXT: s_lshr_b64 s[4:5], s[14:15], s8 6079; GFX9-NEXT: s_lshr_b64 s[8:9], s[12:13], s8 6080; GFX9-NEXT: s_lshl_b64 s[16:17], s[14:15], s16 6081; GFX9-NEXT: s_or_b64 s[8:9], s[8:9], s[16:17] 6082; GFX9-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 6083; GFX9-NEXT: s_cmp_lg_u32 s19, 0 6084; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[14:15] 6085; GFX9-NEXT: s_cmp_lg_u32 s20, 0 6086; GFX9-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9] 6087; GFX9-NEXT: s_cmp_lg_u32 s19, 0 6088; GFX9-NEXT: s_cselect_b64 s[12:13], s[4:5], 0 6089; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[8:9] 6090; GFX9-NEXT: s_or_b64 s[6:7], s[10:11], s[12:13] 6091; GFX9-NEXT: ; return to shader part epilog 6092; 6093; GFX10-LABEL: s_fshr_v2i128: 6094; GFX10: ; %bb.0: 6095; GFX10-NEXT: s_movk_i32 s18, 0x7f 6096; GFX10-NEXT: s_mov_b32 s19, 0 6097; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6098; GFX10-NEXT: s_and_b64 s[22:23], s[16:17], s[18:19] 6099; GFX10-NEXT: s_andn2_b64 s[16:17], s[18:19], s[16:17] 6100; GFX10-NEXT: s_lshr_b32 s24, s1, 31 6101; GFX10-NEXT: s_mov_b32 s25, s19 6102; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 6103; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[24:25] 6104; GFX10-NEXT: s_sub_i32 s23, s16, 64 6105; GFX10-NEXT: s_sub_i32 s17, 64, s16 6106; GFX10-NEXT: s_cmp_lt_u32 s16, 64 6107; GFX10-NEXT: s_cselect_b32 s28, 1, 0 6108; GFX10-NEXT: s_cmp_eq_u32 s16, 0 6109; GFX10-NEXT: s_cselect_b32 s29, 1, 0 6110; GFX10-NEXT: s_lshr_b64 s[24:25], s[0:1], s17 6111; GFX10-NEXT: s_lshl_b64 s[26:27], s[2:3], s16 6112; GFX10-NEXT: s_lshl_b64 s[16:17], s[0:1], s16 6113; GFX10-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27] 6114; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s23 6115; GFX10-NEXT: s_cmp_lg_u32 s28, 0 6116; GFX10-NEXT: s_cselect_b64 s[16:17], s[16:17], 0 6117; GFX10-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1] 6118; GFX10-NEXT: s_cmp_lg_u32 s29, 0 6119; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 6120; GFX10-NEXT: s_sub_i32 s26, s22, 64 6121; GFX10-NEXT: s_sub_i32 s23, 64, s22 6122; GFX10-NEXT: s_cmp_lt_u32 s22, 64 6123; GFX10-NEXT: s_cselect_b32 s27, 1, 0 6124; GFX10-NEXT: s_cmp_eq_u32 s22, 0 6125; GFX10-NEXT: s_cselect_b32 s28, 1, 0 6126; GFX10-NEXT: s_lshr_b64 s[0:1], s[8:9], s22 6127; GFX10-NEXT: s_lshl_b64 s[24:25], s[10:11], s23 6128; GFX10-NEXT: s_lshr_b64 s[22:23], s[10:11], s22 6129; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[24:25] 6130; GFX10-NEXT: s_lshr_b64 s[10:11], s[10:11], s26 6131; GFX10-NEXT: s_cmp_lg_u32 s27, 0 6132; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[10:11] 6133; GFX10-NEXT: s_cmp_lg_u32 s28, 0 6134; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] 6135; GFX10-NEXT: s_cmp_lg_u32 s27, 0 6136; GFX10-NEXT: s_cselect_b64 s[8:9], s[22:23], 0 6137; GFX10-NEXT: s_andn2_b64 s[10:11], s[18:19], s[20:21] 6138; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 6139; GFX10-NEXT: s_and_b64 s[8:9], s[20:21], s[18:19] 6140; GFX10-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 6141; GFX10-NEXT: s_lshr_b32 s18, s5, 31 6142; GFX10-NEXT: s_or_b64 s[0:1], s[16:17], s[0:1] 6143; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], 1 6144; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[18:19] 6145; GFX10-NEXT: s_sub_i32 s9, s10, 64 6146; GFX10-NEXT: s_sub_i32 s11, 64, s10 6147; GFX10-NEXT: s_cmp_lt_u32 s10, 64 6148; GFX10-NEXT: s_cselect_b32 s20, 1, 0 6149; GFX10-NEXT: s_cmp_eq_u32 s10, 0 6150; GFX10-NEXT: s_cselect_b32 s21, 1, 0 6151; GFX10-NEXT: s_lshr_b64 s[16:17], s[4:5], s11 6152; GFX10-NEXT: s_lshl_b64 s[18:19], s[6:7], s10 6153; GFX10-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 6154; GFX10-NEXT: s_or_b64 s[16:17], s[16:17], s[18:19] 6155; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], s9 6156; GFX10-NEXT: s_cmp_lg_u32 s20, 0 6157; GFX10-NEXT: s_cselect_b64 s[10:11], s[10:11], 0 6158; GFX10-NEXT: s_cselect_b64 s[4:5], s[16:17], s[4:5] 6159; GFX10-NEXT: s_cmp_lg_u32 s21, 0 6160; GFX10-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] 6161; GFX10-NEXT: s_sub_i32 s18, s8, 64 6162; GFX10-NEXT: s_sub_i32 s9, 64, s8 6163; GFX10-NEXT: s_cmp_lt_u32 s8, 64 6164; GFX10-NEXT: s_cselect_b32 s19, 1, 0 6165; GFX10-NEXT: s_cmp_eq_u32 s8, 0 6166; GFX10-NEXT: s_cselect_b32 s20, 1, 0 6167; GFX10-NEXT: s_lshr_b64 s[4:5], s[12:13], s8 6168; GFX10-NEXT: s_lshl_b64 s[16:17], s[14:15], s9 6169; GFX10-NEXT: s_lshr_b64 s[8:9], s[14:15], s8 6170; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[16:17] 6171; GFX10-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 6172; GFX10-NEXT: s_cmp_lg_u32 s19, 0 6173; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[14:15] 6174; GFX10-NEXT: s_cmp_lg_u32 s20, 0 6175; GFX10-NEXT: s_cselect_b64 s[4:5], s[12:13], s[4:5] 6176; GFX10-NEXT: s_cmp_lg_u32 s19, 0 6177; GFX10-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 6178; GFX10-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] 6179; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 6180; GFX10-NEXT: ; return to shader part epilog 6181 %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) 6182 ret <2 x i128> %result 6183} 6184 6185define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) { 6186; GFX6-LABEL: v_fshr_v2i128: 6187; GFX6: ; %bb.0: 6188; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6189; GFX6-NEXT: s_movk_i32 s6, 0x7f 6190; GFX6-NEXT: v_xor_b32_e32 v17, -1, v16 6191; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 6192; GFX6-NEXT: v_and_b32_e32 v23, s6, v17 6193; GFX6-NEXT: v_lshrrev_b32_e32 v17, 31, v1 6194; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 6195; GFX6-NEXT: v_or_b32_e32 v2, v2, v17 6196; GFX6-NEXT: v_sub_i32_e32 v17, vcc, 64, v23 6197; GFX6-NEXT: v_lshr_b64 v[17:18], v[0:1], v17 6198; GFX6-NEXT: v_lshl_b64 v[21:22], v[2:3], v23 6199; GFX6-NEXT: v_and_b32_e32 v24, s6, v16 6200; GFX6-NEXT: v_sub_i32_e32 v16, vcc, 64, v24 6201; GFX6-NEXT: v_or_b32_e32 v21, v17, v21 6202; GFX6-NEXT: v_or_b32_e32 v22, v18, v22 6203; GFX6-NEXT: v_lshl_b64 v[16:17], v[10:11], v16 6204; GFX6-NEXT: v_lshr_b64 v[18:19], v[8:9], v24 6205; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23 6206; GFX6-NEXT: v_or_b32_e32 v18, v18, v16 6207; GFX6-NEXT: v_subrev_i32_e32 v16, vcc, 64, v23 6208; GFX6-NEXT: v_or_b32_e32 v19, v19, v17 6209; GFX6-NEXT: v_lshl_b64 v[16:17], v[0:1], v16 6210; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v23 6211; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23 6212; GFX6-NEXT: v_cndmask_b32_e32 v25, 0, v0, vcc 6213; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v21, vcc 6214; GFX6-NEXT: v_cndmask_b32_e32 v16, v17, v22, vcc 6215; GFX6-NEXT: v_cndmask_b32_e64 v17, v0, v2, s[4:5] 6216; GFX6-NEXT: v_cndmask_b32_e64 v16, v16, v3, s[4:5] 6217; GFX6-NEXT: v_subrev_i32_e64 v0, s[4:5], 64, v24 6218; GFX6-NEXT: v_lshr_b64 v[2:3], v[10:11], v0 6219; GFX6-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24 6220; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5] 6221; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v1, vcc 6222; GFX6-NEXT: v_lshr_b64 v[0:1], v[10:11], v24 6223; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 6224; GFX6-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[4:5] 6225; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 6226; GFX6-NEXT: v_cndmask_b32_e64 v8, 0, v0, s[4:5] 6227; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc 6228; GFX6-NEXT: v_cndmask_b32_e64 v9, 0, v1, s[4:5] 6229; GFX6-NEXT: v_or_b32_e32 v0, v25, v2 6230; GFX6-NEXT: v_or_b32_e32 v2, v17, v8 6231; GFX6-NEXT: v_xor_b32_e32 v8, -1, v20 6232; GFX6-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 6233; GFX6-NEXT: v_or_b32_e32 v1, v18, v3 6234; GFX6-NEXT: v_or_b32_e32 v3, v16, v9 6235; GFX6-NEXT: v_and_b32_e32 v17, s6, v8 6236; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], 1 6237; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v5 6238; GFX6-NEXT: v_or_b32_e32 v6, v6, v4 6239; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 64, v17 6240; GFX6-NEXT: v_lshr_b64 v[4:5], v[8:9], v4 6241; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v17 6242; GFX6-NEXT: v_subrev_i32_e32 v18, vcc, 64, v17 6243; GFX6-NEXT: v_or_b32_e32 v10, v4, v10 6244; GFX6-NEXT: v_or_b32_e32 v11, v5, v11 6245; GFX6-NEXT: v_lshl_b64 v[4:5], v[8:9], v17 6246; GFX6-NEXT: v_lshl_b64 v[8:9], v[8:9], v18 6247; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 6248; GFX6-NEXT: v_and_b32_e32 v16, s6, v20 6249; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v4, vcc 6250; GFX6-NEXT: v_cndmask_b32_e32 v19, 0, v5, vcc 6251; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v10, vcc 6252; GFX6-NEXT: v_cndmask_b32_e32 v5, v9, v11, vcc 6253; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 6254; GFX6-NEXT: v_cndmask_b32_e32 v8, v4, v6, vcc 6255; GFX6-NEXT: v_cndmask_b32_e32 v9, v5, v7, vcc 6256; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 64, v16 6257; GFX6-NEXT: v_lshr_b64 v[4:5], v[12:13], v16 6258; GFX6-NEXT: v_lshl_b64 v[6:7], v[14:15], v6 6259; GFX6-NEXT: v_subrev_i32_e32 v10, vcc, 64, v16 6260; GFX6-NEXT: v_or_b32_e32 v11, v4, v6 6261; GFX6-NEXT: v_or_b32_e32 v17, v5, v7 6262; GFX6-NEXT: v_lshr_b64 v[6:7], v[14:15], v10 6263; GFX6-NEXT: v_lshr_b64 v[4:5], v[14:15], v16 6264; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 6265; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc 6266; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc 6267; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16 6268; GFX6-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] 6269; GFX6-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[4:5] 6270; GFX6-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc 6271; GFX6-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc 6272; GFX6-NEXT: v_or_b32_e32 v4, v18, v6 6273; GFX6-NEXT: v_or_b32_e32 v5, v19, v7 6274; GFX6-NEXT: v_or_b32_e32 v6, v8, v10 6275; GFX6-NEXT: v_or_b32_e32 v7, v9, v11 6276; GFX6-NEXT: s_setpc_b64 s[30:31] 6277; 6278; GFX8-LABEL: v_fshr_v2i128: 6279; GFX8: ; %bb.0: 6280; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6281; GFX8-NEXT: s_movk_i32 s6, 0x7f 6282; GFX8-NEXT: v_xor_b32_e32 v17, -1, v16 6283; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6284; GFX8-NEXT: v_and_b32_e32 v23, s6, v17 6285; GFX8-NEXT: v_lshrrev_b32_e32 v17, 31, v1 6286; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 6287; GFX8-NEXT: v_or_b32_e32 v2, v2, v17 6288; GFX8-NEXT: v_sub_u32_e32 v17, vcc, 64, v23 6289; GFX8-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1] 6290; GFX8-NEXT: v_lshlrev_b64 v[21:22], v23, v[2:3] 6291; GFX8-NEXT: v_and_b32_e32 v24, s6, v16 6292; GFX8-NEXT: v_sub_u32_e32 v16, vcc, 64, v24 6293; GFX8-NEXT: v_or_b32_e32 v21, v17, v21 6294; GFX8-NEXT: v_or_b32_e32 v22, v18, v22 6295; GFX8-NEXT: v_lshlrev_b64 v[16:17], v16, v[10:11] 6296; GFX8-NEXT: v_lshrrev_b64 v[18:19], v24, v[8:9] 6297; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23 6298; GFX8-NEXT: v_or_b32_e32 v18, v18, v16 6299; GFX8-NEXT: v_subrev_u32_e32 v16, vcc, 64, v23 6300; GFX8-NEXT: v_or_b32_e32 v19, v19, v17 6301; GFX8-NEXT: v_lshlrev_b64 v[16:17], v16, v[0:1] 6302; GFX8-NEXT: v_lshlrev_b64 v[0:1], v23, v[0:1] 6303; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23 6304; GFX8-NEXT: v_cndmask_b32_e32 v25, 0, v0, vcc 6305; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v21, vcc 6306; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v22, vcc 6307; GFX8-NEXT: v_cndmask_b32_e64 v17, v0, v2, s[4:5] 6308; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v3, s[4:5] 6309; GFX8-NEXT: v_subrev_u32_e64 v0, s[4:5], 64, v24 6310; GFX8-NEXT: v_lshrrev_b64 v[2:3], v0, v[10:11] 6311; GFX8-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24 6312; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5] 6313; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v1, vcc 6314; GFX8-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11] 6315; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 6316; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[4:5] 6317; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 6318; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v0, s[4:5] 6319; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc 6320; GFX8-NEXT: v_cndmask_b32_e64 v9, 0, v1, s[4:5] 6321; GFX8-NEXT: v_or_b32_e32 v0, v25, v2 6322; GFX8-NEXT: v_or_b32_e32 v2, v17, v8 6323; GFX8-NEXT: v_xor_b32_e32 v8, -1, v20 6324; GFX8-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 6325; GFX8-NEXT: v_or_b32_e32 v1, v18, v3 6326; GFX8-NEXT: v_or_b32_e32 v3, v16, v9 6327; GFX8-NEXT: v_and_b32_e32 v17, s6, v8 6328; GFX8-NEXT: v_lshlrev_b64 v[8:9], 1, v[4:5] 6329; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v5 6330; GFX8-NEXT: v_or_b32_e32 v6, v6, v4 6331; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 64, v17 6332; GFX8-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9] 6333; GFX8-NEXT: v_lshlrev_b64 v[10:11], v17, v[6:7] 6334; GFX8-NEXT: v_subrev_u32_e32 v18, vcc, 64, v17 6335; GFX8-NEXT: v_or_b32_e32 v10, v4, v10 6336; GFX8-NEXT: v_or_b32_e32 v11, v5, v11 6337; GFX8-NEXT: v_lshlrev_b64 v[4:5], v17, v[8:9] 6338; GFX8-NEXT: v_lshlrev_b64 v[8:9], v18, v[8:9] 6339; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 6340; GFX8-NEXT: v_and_b32_e32 v16, s6, v20 6341; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v4, vcc 6342; GFX8-NEXT: v_cndmask_b32_e32 v19, 0, v5, vcc 6343; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v10, vcc 6344; GFX8-NEXT: v_cndmask_b32_e32 v5, v9, v11, vcc 6345; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 6346; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v6, vcc 6347; GFX8-NEXT: v_cndmask_b32_e32 v9, v5, v7, vcc 6348; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 64, v16 6349; GFX8-NEXT: v_lshrrev_b64 v[4:5], v16, v[12:13] 6350; GFX8-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15] 6351; GFX8-NEXT: v_subrev_u32_e32 v10, vcc, 64, v16 6352; GFX8-NEXT: v_or_b32_e32 v11, v4, v6 6353; GFX8-NEXT: v_or_b32_e32 v17, v5, v7 6354; GFX8-NEXT: v_lshrrev_b64 v[6:7], v10, v[14:15] 6355; GFX8-NEXT: v_lshrrev_b64 v[4:5], v16, v[14:15] 6356; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 6357; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc 6358; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc 6359; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16 6360; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] 6361; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[4:5] 6362; GFX8-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc 6363; GFX8-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc 6364; GFX8-NEXT: v_or_b32_e32 v4, v18, v6 6365; GFX8-NEXT: v_or_b32_e32 v5, v19, v7 6366; GFX8-NEXT: v_or_b32_e32 v6, v8, v10 6367; GFX8-NEXT: v_or_b32_e32 v7, v9, v11 6368; GFX8-NEXT: s_setpc_b64 s[30:31] 6369; 6370; GFX9-LABEL: v_fshr_v2i128: 6371; GFX9: ; %bb.0: 6372; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6373; GFX9-NEXT: s_movk_i32 s6, 0x7f 6374; GFX9-NEXT: v_xor_b32_e32 v17, -1, v16 6375; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6376; GFX9-NEXT: v_and_b32_e32 v23, s6, v17 6377; GFX9-NEXT: v_lshrrev_b32_e32 v17, 31, v1 6378; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 6379; GFX9-NEXT: v_or_b32_e32 v2, v2, v17 6380; GFX9-NEXT: v_sub_u32_e32 v17, 64, v23 6381; GFX9-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1] 6382; GFX9-NEXT: v_lshlrev_b64 v[21:22], v23, v[2:3] 6383; GFX9-NEXT: v_and_b32_e32 v24, s6, v16 6384; GFX9-NEXT: v_sub_u32_e32 v16, 64, v24 6385; GFX9-NEXT: v_or_b32_e32 v21, v17, v21 6386; GFX9-NEXT: v_or_b32_e32 v22, v18, v22 6387; GFX9-NEXT: v_lshlrev_b64 v[16:17], v16, v[10:11] 6388; GFX9-NEXT: v_lshrrev_b64 v[18:19], v24, v[8:9] 6389; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23 6390; GFX9-NEXT: v_or_b32_e32 v18, v18, v16 6391; GFX9-NEXT: v_subrev_u32_e32 v16, 64, v23 6392; GFX9-NEXT: v_or_b32_e32 v19, v19, v17 6393; GFX9-NEXT: v_lshlrev_b64 v[16:17], v16, v[0:1] 6394; GFX9-NEXT: v_lshlrev_b64 v[0:1], v23, v[0:1] 6395; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23 6396; GFX9-NEXT: v_cndmask_b32_e32 v25, 0, v0, vcc 6397; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v21, vcc 6398; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v22, vcc 6399; GFX9-NEXT: v_cndmask_b32_e64 v17, v0, v2, s[4:5] 6400; GFX9-NEXT: v_subrev_u32_e32 v0, 64, v24 6401; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v3, s[4:5] 6402; GFX9-NEXT: v_lshrrev_b64 v[2:3], v0, v[10:11] 6403; GFX9-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24 6404; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5] 6405; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v1, vcc 6406; GFX9-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11] 6407; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 6408; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[4:5] 6409; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 6410; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v0, s[4:5] 6411; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc 6412; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, v1, s[4:5] 6413; GFX9-NEXT: v_or_b32_e32 v0, v25, v2 6414; GFX9-NEXT: v_or_b32_e32 v2, v17, v8 6415; GFX9-NEXT: v_xor_b32_e32 v8, -1, v20 6416; GFX9-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 6417; GFX9-NEXT: v_or_b32_e32 v1, v18, v3 6418; GFX9-NEXT: v_or_b32_e32 v3, v16, v9 6419; GFX9-NEXT: v_and_b32_e32 v17, s6, v8 6420; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[4:5] 6421; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v5 6422; GFX9-NEXT: v_or_b32_e32 v6, v6, v4 6423; GFX9-NEXT: v_sub_u32_e32 v4, 64, v17 6424; GFX9-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9] 6425; GFX9-NEXT: v_lshlrev_b64 v[10:11], v17, v[6:7] 6426; GFX9-NEXT: v_subrev_u32_e32 v18, 64, v17 6427; GFX9-NEXT: v_or_b32_e32 v10, v4, v10 6428; GFX9-NEXT: v_or_b32_e32 v11, v5, v11 6429; GFX9-NEXT: v_lshlrev_b64 v[4:5], v17, v[8:9] 6430; GFX9-NEXT: v_lshlrev_b64 v[8:9], v18, v[8:9] 6431; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 6432; GFX9-NEXT: v_and_b32_e32 v16, s6, v20 6433; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v4, vcc 6434; GFX9-NEXT: v_cndmask_b32_e32 v19, 0, v5, vcc 6435; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v10, vcc 6436; GFX9-NEXT: v_cndmask_b32_e32 v5, v9, v11, vcc 6437; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 6438; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v6, vcc 6439; GFX9-NEXT: v_sub_u32_e32 v6, 64, v16 6440; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v7, vcc 6441; GFX9-NEXT: v_lshrrev_b64 v[4:5], v16, v[12:13] 6442; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15] 6443; GFX9-NEXT: v_subrev_u32_e32 v10, 64, v16 6444; GFX9-NEXT: v_or_b32_e32 v11, v4, v6 6445; GFX9-NEXT: v_or_b32_e32 v17, v5, v7 6446; GFX9-NEXT: v_lshrrev_b64 v[6:7], v10, v[14:15] 6447; GFX9-NEXT: v_lshrrev_b64 v[4:5], v16, v[14:15] 6448; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 6449; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v11, vcc 6450; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v17, vcc 6451; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v16 6452; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] 6453; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[4:5] 6454; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc 6455; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc 6456; GFX9-NEXT: v_or_b32_e32 v4, v18, v6 6457; GFX9-NEXT: v_or_b32_e32 v5, v19, v7 6458; GFX9-NEXT: v_or_b32_e32 v6, v8, v10 6459; GFX9-NEXT: v_or_b32_e32 v7, v9, v11 6460; GFX9-NEXT: s_setpc_b64 s[30:31] 6461; 6462; GFX10-LABEL: v_fshr_v2i128: 6463; GFX10: ; %bb.0: 6464; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6465; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 6466; GFX10-NEXT: v_xor_b32_e32 v17, -1, v16 6467; GFX10-NEXT: s_movk_i32 s5, 0x7f 6468; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6469; GFX10-NEXT: v_and_b32_e32 v26, s5, v16 6470; GFX10-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 6471; GFX10-NEXT: v_and_b32_e32 v25, s5, v17 6472; GFX10-NEXT: v_lshrrev_b32_e32 v17, 31, v1 6473; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 6474; GFX10-NEXT: v_subrev_nc_u32_e32 v27, 64, v26 6475; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v26 6476; GFX10-NEXT: v_sub_nc_u32_e32 v18, 64, v25 6477; GFX10-NEXT: v_or_b32_e32 v2, v2, v17 6478; GFX10-NEXT: v_subrev_nc_u32_e32 v19, 64, v25 6479; GFX10-NEXT: v_lshlrev_b64 v[23:24], v25, v[0:1] 6480; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25 6481; GFX10-NEXT: v_lshrrev_b64 v[17:18], v18, v[0:1] 6482; GFX10-NEXT: v_lshlrev_b64 v[21:22], v25, v[2:3] 6483; GFX10-NEXT: v_lshlrev_b64 v[0:1], v19, v[0:1] 6484; GFX10-NEXT: v_cndmask_b32_e32 v23, 0, v23, vcc_lo 6485; GFX10-NEXT: v_cndmask_b32_e32 v24, 0, v24, vcc_lo 6486; GFX10-NEXT: v_or_b32_e32 v22, v18, v22 6487; GFX10-NEXT: v_sub_nc_u32_e32 v18, 64, v26 6488; GFX10-NEXT: v_or_b32_e32 v21, v17, v21 6489; GFX10-NEXT: v_lshrrev_b64 v[16:17], v26, v[8:9] 6490; GFX10-NEXT: v_cndmask_b32_e32 v22, v1, v22, vcc_lo 6491; GFX10-NEXT: v_lshlrev_b64 v[18:19], v18, v[10:11] 6492; GFX10-NEXT: v_cndmask_b32_e32 v21, v0, v21, vcc_lo 6493; GFX10-NEXT: v_lshrrev_b64 v[0:1], v27, v[10:11] 6494; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v25 6495; GFX10-NEXT: v_or_b32_e32 v16, v16, v18 6496; GFX10-NEXT: v_or_b32_e32 v17, v17, v19 6497; GFX10-NEXT: v_cndmask_b32_e32 v18, v21, v2, vcc_lo 6498; GFX10-NEXT: v_cndmask_b32_e32 v22, v22, v3, vcc_lo 6499; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v26 6500; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v16, s4 6501; GFX10-NEXT: v_xor_b32_e32 v16, -1, v20 6502; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v17, s4 6503; GFX10-NEXT: v_lshrrev_b64 v[2:3], v26, v[10:11] 6504; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 6505; GFX10-NEXT: v_and_b32_e32 v25, s5, v16 6506; GFX10-NEXT: v_lshrrev_b32_e32 v8, 31, v5 6507; GFX10-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5] 6508; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 6509; GFX10-NEXT: v_or_b32_e32 v0, v23, v0 6510; GFX10-NEXT: v_sub_nc_u32_e32 v9, 64, v25 6511; GFX10-NEXT: v_or_b32_e32 v6, v6, v8 6512; GFX10-NEXT: v_and_b32_e32 v23, s5, v20 6513; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s4 6514; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, v3, s4 6515; GFX10-NEXT: v_lshrrev_b64 v[8:9], v9, v[4:5] 6516; GFX10-NEXT: v_lshlrev_b64 v[10:11], v25, v[6:7] 6517; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v23 6518; GFX10-NEXT: v_subrev_nc_u32_e32 v3, 64, v25 6519; GFX10-NEXT: v_or_b32_e32 v2, v18, v2 6520; GFX10-NEXT: v_lshlrev_b64 v[16:17], v25, v[4:5] 6521; GFX10-NEXT: v_lshrrev_b64 v[18:19], v23, v[12:13] 6522; GFX10-NEXT: v_or_b32_e32 v10, v8, v10 6523; GFX10-NEXT: v_subrev_nc_u32_e32 v8, 64, v23 6524; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] 6525; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25 6526; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5] 6527; GFX10-NEXT: v_or_b32_e32 v5, v9, v11 6528; GFX10-NEXT: v_lshrrev_b64 v[8:9], v8, v[14:15] 6529; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v23 6530; GFX10-NEXT: v_cndmask_b32_e32 v11, 0, v16, vcc_lo 6531; GFX10-NEXT: v_or_b32_e32 v16, v18, v20 6532; GFX10-NEXT: v_or_b32_e32 v18, v19, v21 6533; GFX10-NEXT: v_cndmask_b32_e32 v10, v3, v10, vcc_lo 6534; GFX10-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo 6535; GFX10-NEXT: v_lshrrev_b64 v[3:4], v23, v[14:15] 6536; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v16, s4 6537; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v23 6538; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v25 6539; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v18, s4 6540; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo 6541; GFX10-NEXT: v_or_b32_e32 v1, v24, v1 6542; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v6, s6 6543; GFX10-NEXT: v_cndmask_b32_e64 v7, v5, v7, s6 6544; GFX10-NEXT: v_cndmask_b32_e64 v5, v8, v12, s5 6545; GFX10-NEXT: v_cndmask_b32_e64 v8, v9, v13, s5 6546; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v3, s4 6547; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v4, s4 6548; GFX10-NEXT: v_or_b32_e32 v3, v22, v26 6549; GFX10-NEXT: v_or_b32_e32 v4, v11, v5 6550; GFX10-NEXT: v_or_b32_e32 v5, v14, v8 6551; GFX10-NEXT: v_or_b32_e32 v6, v6, v9 6552; GFX10-NEXT: v_or_b32_e32 v7, v7, v10 6553; GFX10-NEXT: s_setpc_b64 s[30:31] 6554 %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) 6555 ret <2 x i128> %result 6556} 6557 6558declare i7 @llvm.fshr.i7(i7, i7, i7) #0 6559declare i8 @llvm.fshr.i8(i8, i8, i8) #0 6560declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0 6561declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0 6562 6563declare i16 @llvm.fshr.i16(i16, i16, i16) #0 6564declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0 6565declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0 6566declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0 6567declare <5 x i16> @llvm.fshr.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0 6568declare <6 x i16> @llvm.fshr.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0 6569declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0 6570 6571declare i24 @llvm.fshr.i24(i24, i24, i24) #0 6572declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0 6573 6574declare i32 @llvm.fshr.i32(i32, i32, i32) #0 6575declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0 6576declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0 6577declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0 6578declare <5 x i32> @llvm.fshr.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0 6579declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0 6580 6581declare i48 @llvm.fshr.i48(i48, i48, i48) #0 6582 6583declare i64 @llvm.fshr.i64(i64, i64, i64) #0 6584declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0 6585 6586declare i128 @llvm.fshr.i128(i128, i128, i128) #0 6587declare <2 x i128> @llvm.fshr.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0 6588 6589attributes #0 = { nounwind readnone speculatable willreturn } 6590