1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck %s 3 4define i16 @csh_16(i16 %a, i16 %b) { 5; CHECK-LABEL: csh_16: 6; CHECK: ; %bb.0: 7; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8; CHECK-NEXT: v_lshlrev_b16_e32 v2, v1, v0 9; CHECK-NEXT: v_lshrrev_b16_e32 v3, v1, v0 10; CHECK-NEXT: v_ashrrev_i16_e32 v0, v1, v0 11; CHECK-NEXT: v_add_u16_e32 v1, v2, v3 12; CHECK-NEXT: v_add_u16_e32 v0, v1, v0 13; CHECK-NEXT: s_setpc_b64 s[30:31] 14 %and = and i16 %b, 15 15 %shl = shl i16 %a, %and 16 %lshr = lshr i16 %a, %and 17 %ashr = ashr i16 %a, %and 18 %ret.0 = add i16 %shl, %lshr 19 %ret = add i16 %ret.0, %ashr 20 ret i16 %ret 21} 22 23define i32 @csh_32(i32 %a, i32 %b) { 24; CHECK-LABEL: csh_32: 25; CHECK: ; %bb.0: 26; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; CHECK-NEXT: v_lshlrev_b32_e32 v2, v1, v0 28; CHECK-NEXT: v_lshrrev_b32_e32 v3, v1, v0 29; CHECK-NEXT: v_ashrrev_i32_e32 v0, v1, v0 30; CHECK-NEXT: v_add3_u32 v0, v2, v3, v0 31; CHECK-NEXT: s_setpc_b64 s[30:31] 32 %and = and i32 %b, 31 33 %shl = shl i32 %a, %and 34 %lshr = lshr i32 %a, %and 35 %ashr = ashr i32 %a, %and 36 %ret.0 = add i32 %shl, %lshr 37 %ret = add i32 %ret.0, %ashr 38 ret i32 %ret 39} 40 41define amdgpu_ps i32 @s_csh_32(i32 inreg %a, i32 inreg %b) { 42; CHECK-LABEL: s_csh_32: 43; CHECK: ; %bb.0: 44; CHECK-NEXT: s_lshl_b32 s2, s0, s1 45; CHECK-NEXT: s_lshr_b32 s3, s0, s1 46; CHECK-NEXT: s_ashr_i32 s0, s0, s1 47; CHECK-NEXT: s_add_i32 s1, s2, s3 48; CHECK-NEXT: s_add_i32 s0, s1, s0 49; CHECK-NEXT: ; return to shader part epilog 50 %and = and i32 %b, 31 51 %shl = shl i32 %a, %and 52 %lshr = lshr i32 %a, %and 53 %ashr = ashr i32 %a, %and 54 %ret.0 = add i32 %shl, %lshr 55 %ret = add i32 %ret.0, %ashr 56 ret i32 %ret 57} 58 59define <4 x i32> @csh_v4i32(<4 x i32> %a, <4 x i32> %b) { 60; CHECK-LABEL: csh_v4i32: 61; CHECK: ; %bb.0: 62; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 63; CHECK-NEXT: v_lshlrev_b32_e32 v8, v7, v3 64; CHECK-NEXT: v_lshlrev_b32_e32 v9, v6, v2 65; CHECK-NEXT: v_lshlrev_b32_e32 v10, v5, v1 66; CHECK-NEXT: v_lshlrev_b32_e32 v11, v4, v0 67; CHECK-NEXT: v_lshrrev_b32_e32 v12, v7, v3 68; CHECK-NEXT: v_lshrrev_b32_e32 v13, v6, v2 69; CHECK-NEXT: v_lshrrev_b32_e32 v14, v5, v1 70; CHECK-NEXT: v_lshrrev_b32_e32 v15, v4, v0 71; CHECK-NEXT: v_ashrrev_i32_e32 v3, v7, v3 72; CHECK-NEXT: v_ashrrev_i32_e32 v2, v6, v2 73; CHECK-NEXT: v_ashrrev_i32_e32 v1, v5, v1 74; CHECK-NEXT: v_ashrrev_i32_e32 v0, v4, v0 75; CHECK-NEXT: v_add3_u32 v0, v11, v15, v0 76; CHECK-NEXT: v_add3_u32 v1, v10, v14, v1 77; CHECK-NEXT: v_add3_u32 v2, v9, v13, v2 78; CHECK-NEXT: v_add3_u32 v3, v8, v12, v3 79; CHECK-NEXT: s_setpc_b64 s[30:31] 80 %and = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31> 81 %shl = shl <4 x i32> %a, %and 82 %lshr = lshr <4 x i32> %a, %and 83 %ashr = ashr <4 x i32> %a, %and 84 %ret.0 = add <4 x i32> %shl, %lshr 85 %ret = add <4 x i32> %ret.0, %ashr 86 ret <4 x i32> %ret 87} 88 89define amdgpu_ps <4 x i32> @s_csh_v4i32(<4 x i32> inreg %a, <4 x i32> inreg %b) { 90; CHECK-LABEL: s_csh_v4i32: 91; CHECK: ; %bb.0: 92; CHECK-NEXT: s_lshl_b32 s8, s0, s4 93; CHECK-NEXT: s_lshl_b32 s9, s1, s5 94; CHECK-NEXT: s_lshl_b32 s10, s2, s6 95; CHECK-NEXT: s_lshl_b32 s11, s3, s7 96; CHECK-NEXT: s_lshr_b32 s12, s0, s4 97; CHECK-NEXT: s_lshr_b32 s13, s1, s5 98; CHECK-NEXT: s_lshr_b32 s14, s2, s6 99; CHECK-NEXT: s_lshr_b32 s15, s3, s7 100; CHECK-NEXT: s_ashr_i32 s3, s3, s7 101; CHECK-NEXT: s_ashr_i32 s2, s2, s6 102; CHECK-NEXT: s_ashr_i32 s1, s1, s5 103; CHECK-NEXT: s_ashr_i32 s0, s0, s4 104; CHECK-NEXT: s_add_i32 s4, s11, s15 105; CHECK-NEXT: s_add_i32 s5, s10, s14 106; CHECK-NEXT: s_add_i32 s6, s9, s13 107; CHECK-NEXT: s_add_i32 s7, s8, s12 108; CHECK-NEXT: s_add_i32 s0, s7, s0 109; CHECK-NEXT: s_add_i32 s1, s6, s1 110; CHECK-NEXT: s_add_i32 s2, s5, s2 111; CHECK-NEXT: s_add_i32 s3, s4, s3 112; CHECK-NEXT: ; return to shader part epilog 113 %and = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31> 114 %shl = shl <4 x i32> %a, %and 115 %lshr = lshr <4 x i32> %a, %and 116 %ashr = ashr <4 x i32> %a, %and 117 %ret.0 = add <4 x i32> %shl, %lshr 118 %ret = add <4 x i32> %ret.0, %ashr 119 ret <4 x i32> %ret 120} 121 122define i64 @csh_64(i64 %a, i64 %b) { 123; CHECK-LABEL: csh_64: 124; CHECK: ; %bb.0: 125; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 126; CHECK-NEXT: v_lshlrev_b64 v[3:4], v2, v[0:1] 127; CHECK-NEXT: v_lshrrev_b64 v[5:6], v2, v[0:1] 128; CHECK-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1] 129; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, v3, v5 130; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v4, v6, vcc 131; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 132; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc 133; CHECK-NEXT: s_setpc_b64 s[30:31] 134 %and = and i64 %b, 63 135 %shl = shl i64 %a, %and 136 %lshr = lshr i64 %a, %and 137 %ashr = ashr i64 %a, %and 138 %ret.0 = add i64 %shl, %lshr 139 %ret = add i64 %ret.0, %ashr 140 ret i64 %ret 141} 142 143define amdgpu_ps i64 @s_csh_64(i64 inreg %a, i64 inreg %b) { 144; CHECK-LABEL: s_csh_64: 145; CHECK: ; %bb.0: 146; CHECK-NEXT: s_lshl_b64 s[4:5], s[0:1], s2 147; CHECK-NEXT: s_lshr_b64 s[6:7], s[0:1], s2 148; CHECK-NEXT: s_ashr_i64 s[0:1], s[0:1], s2 149; CHECK-NEXT: s_add_u32 s2, s4, s6 150; CHECK-NEXT: s_addc_u32 s3, s5, s7 151; CHECK-NEXT: s_add_u32 s0, s2, s0 152; CHECK-NEXT: s_addc_u32 s1, s3, s1 153; CHECK-NEXT: ; return to shader part epilog 154 %and = and i64 %b, 63 155 %shl = shl i64 %a, %and 156 %lshr = lshr i64 %a, %and 157 %ashr = ashr i64 %a, %and 158 %ret.0 = add i64 %shl, %lshr 159 %ret = add i64 %ret.0, %ashr 160 ret i64 %ret 161} 162 163define i32 @cshl_or(i32 %a, i32 %b) { 164; CHECK-LABEL: cshl_or: 165; CHECK: ; %bb.0: 166; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 167; CHECK-NEXT: v_lshl_or_b32 v0, v0, v1, v0 168; CHECK-NEXT: s_setpc_b64 s[30:31] 169 %and = and i32 %b, 31 170 %shl = shl i32 %a, %and 171 %or = or i32 %shl, %a 172 ret i32 %or 173} 174 175define i32 @cshl_add(i32 %a, i32 %b, i32 %c) { 176; CHECK-LABEL: cshl_add: 177; CHECK: ; %bb.0: 178; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 179; CHECK-NEXT: v_lshl_add_u32 v0, v0, v1, v2 180; CHECK-NEXT: s_setpc_b64 s[30:31] 181 %and = and i32 %b, 31 182 %shl = shl i32 %a, %and 183 %add = add i32 %shl, %c 184 ret i32 %add 185} 186 187define i32 @add_cshl(i32 %a, i32 %b) { 188; CHECK-LABEL: add_cshl: 189; CHECK: ; %bb.0: 190; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 191; CHECK-NEXT: v_add_lshl_u32 v0, v0, v1, v1 192; CHECK-NEXT: s_setpc_b64 s[30:31] 193 %add = add i32 %a, %b 194 %and = and i32 %b, 31 195 %shl = shl i32 %add, %and 196 ret i32 %shl 197} 198