1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel < %s | FileCheck -check-prefix=GISEL %s 4 5define i16 @csh_16(i16 %a, i16 %b) { 6; CHECK-LABEL: csh_16: 7; CHECK: ; %bb.0: 8; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9; CHECK-NEXT: v_lshlrev_b16_e32 v2, v1, v0 10; CHECK-NEXT: v_lshrrev_b16_e32 v3, v1, v0 11; CHECK-NEXT: v_ashrrev_i16_e32 v0, v1, v0 12; CHECK-NEXT: v_add_u16_e32 v1, v2, v3 13; CHECK-NEXT: v_add_u16_e32 v0, v1, v0 14; CHECK-NEXT: s_setpc_b64 s[30:31] 15; 16; GISEL-LABEL: csh_16: 17; GISEL: ; %bb.0: 18; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; GISEL-NEXT: v_and_b32_e32 v1, 15, v1 20; GISEL-NEXT: v_lshlrev_b16_e32 v2, v1, v0 21; GISEL-NEXT: v_lshrrev_b16_e32 v3, v1, v0 22; GISEL-NEXT: v_ashrrev_i16_e32 v0, v1, v0 23; GISEL-NEXT: v_add_u16_e32 v1, v2, v3 24; GISEL-NEXT: v_add_u16_e32 v0, v1, v0 25; GISEL-NEXT: s_setpc_b64 s[30:31] 26 %and = and i16 %b, 15 27 %shl = shl i16 %a, %and 28 %lshr = lshr i16 %a, %and 29 %ashr = ashr i16 %a, %and 30 %ret.0 = add i16 %shl, %lshr 31 %ret = add i16 %ret.0, %ashr 32 ret i16 %ret 33} 34 35define i32 @csh_32(i32 %a, i32 %b) { 36; CHECK-LABEL: csh_32: 37; CHECK: ; %bb.0: 38; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39; CHECK-NEXT: v_lshlrev_b32_e32 v2, v1, v0 40; CHECK-NEXT: v_lshrrev_b32_e32 v3, v1, v0 41; CHECK-NEXT: v_ashrrev_i32_e32 v0, v1, v0 42; CHECK-NEXT: v_add3_u32 v0, v2, v3, v0 43; CHECK-NEXT: s_setpc_b64 s[30:31] 44; 45; GISEL-LABEL: csh_32: 46; GISEL: ; %bb.0: 47; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GISEL-NEXT: v_and_b32_e32 v1, 31, v1 49; GISEL-NEXT: v_lshlrev_b32_e32 v2, v1, v0 50; GISEL-NEXT: v_lshrrev_b32_e32 v3, v1, v0 51; GISEL-NEXT: v_ashrrev_i32_e32 v0, v1, v0 52; GISEL-NEXT: v_add3_u32 v0, v2, v3, v0 53; GISEL-NEXT: s_setpc_b64 s[30:31] 54 %and = and i32 %b, 31 55 %shl = shl i32 %a, %and 56 %lshr = lshr i32 %a, %and 57 %ashr = ashr i32 %a, %and 58 %ret.0 = add i32 %shl, %lshr 59 %ret = add i32 %ret.0, %ashr 60 ret i32 %ret 61} 62 63define amdgpu_ps i32 @s_csh_32_0(i32 inreg %a, i32 inreg %b) { 64; CHECK-LABEL: s_csh_32_0: 65; CHECK: ; %bb.0: 66; CHECK-NEXT: s_lshl_b32 s2, s0, s1 67; CHECK-NEXT: s_lshr_b32 s3, s0, s1 68; CHECK-NEXT: s_ashr_i32 s0, s0, s1 69; CHECK-NEXT: s_add_i32 s1, s2, s3 70; CHECK-NEXT: s_add_i32 s0, s1, s0 71; CHECK-NEXT: ; return to shader part epilog 72; 73; GISEL-LABEL: s_csh_32_0: 74; GISEL: ; %bb.0: 75; GISEL-NEXT: s_lshl_b32 s2, s0, s1 76; GISEL-NEXT: s_lshr_b32 s3, s0, s1 77; GISEL-NEXT: s_ashr_i32 s0, s0, s1 78; GISEL-NEXT: s_add_i32 s1, s2, s3 79; GISEL-NEXT: s_add_i32 s0, s1, s0 80; GISEL-NEXT: ; return to shader part epilog 81 %and = and i32 %b, 31 82 %shl = shl i32 %a, %and 83 %lshr = lshr i32 %a, %and 84 %ashr = ashr i32 %a, %and 85 %ret.0 = add i32 %shl, %lshr 86 %ret = add i32 %ret.0, %ashr 87 ret i32 %ret 88} 89 90define amdgpu_ps i32 @s_csh_32_1(i32 inreg %a, i32 inreg %b) { 91; CHECK-LABEL: s_csh_32_1: 92; CHECK: ; %bb.0: 93; CHECK-NEXT: s_lshl_b32 s2, s0, s1 94; CHECK-NEXT: s_lshr_b32 s3, s0, s1 95; CHECK-NEXT: s_ashr_i32 s0, s0, s1 96; CHECK-NEXT: s_add_i32 s1, s2, s3 97; CHECK-NEXT: s_add_i32 s0, s1, s0 98; CHECK-NEXT: ; return to shader part epilog 99; 100; GISEL-LABEL: s_csh_32_1: 101; GISEL: ; %bb.0: 102; GISEL-NEXT: s_lshl_b32 s2, s0, s1 103; GISEL-NEXT: s_lshr_b32 s3, s0, s1 104; GISEL-NEXT: s_ashr_i32 s0, s0, s1 105; GISEL-NEXT: s_add_i32 s1, s2, s3 106; GISEL-NEXT: s_add_i32 s0, s1, s0 107; GISEL-NEXT: ; return to shader part epilog 108 %and = and i32 %b, 127 109 %shl = shl i32 %a, %and 110 %lshr = lshr i32 %a, %and 111 %ashr = ashr i32 %a, %and 112 %ret.0 = add i32 %shl, %lshr 113 %ret = add i32 %ret.0, %ashr 114 ret i32 %ret 115} 116 117define <4 x i32> @csh_v4i32(<4 x i32> %a, <4 x i32> %b) { 118; CHECK-LABEL: csh_v4i32: 119; CHECK: ; %bb.0: 120; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; CHECK-NEXT: v_lshlrev_b32_e32 v8, v7, v3 122; CHECK-NEXT: v_lshlrev_b32_e32 v9, v6, v2 123; CHECK-NEXT: v_lshlrev_b32_e32 v10, v5, v1 124; CHECK-NEXT: v_lshlrev_b32_e32 v11, v4, v0 125; CHECK-NEXT: v_lshrrev_b32_e32 v12, v7, v3 126; CHECK-NEXT: v_lshrrev_b32_e32 v13, v6, v2 127; CHECK-NEXT: v_lshrrev_b32_e32 v14, v5, v1 128; CHECK-NEXT: v_lshrrev_b32_e32 v15, v4, v0 129; CHECK-NEXT: v_ashrrev_i32_e32 v3, v7, v3 130; CHECK-NEXT: v_ashrrev_i32_e32 v2, v6, v2 131; CHECK-NEXT: v_ashrrev_i32_e32 v1, v5, v1 132; CHECK-NEXT: v_ashrrev_i32_e32 v0, v4, v0 133; CHECK-NEXT: v_add3_u32 v0, v11, v15, v0 134; CHECK-NEXT: v_add3_u32 v1, v10, v14, v1 135; CHECK-NEXT: v_add3_u32 v2, v9, v13, v2 136; CHECK-NEXT: v_add3_u32 v3, v8, v12, v3 137; CHECK-NEXT: s_setpc_b64 s[30:31] 138; 139; GISEL-LABEL: csh_v4i32: 140; GISEL: ; %bb.0: 141; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142; GISEL-NEXT: v_and_b32_e32 v4, 31, v4 143; GISEL-NEXT: v_and_b32_e32 v5, 31, v5 144; GISEL-NEXT: v_and_b32_e32 v6, 31, v6 145; GISEL-NEXT: v_and_b32_e32 v7, 31, v7 146; GISEL-NEXT: v_lshlrev_b32_e32 v8, v4, v0 147; GISEL-NEXT: v_lshlrev_b32_e32 v9, v5, v1 148; GISEL-NEXT: v_lshlrev_b32_e32 v10, v6, v2 149; GISEL-NEXT: v_lshlrev_b32_e32 v11, v7, v3 150; GISEL-NEXT: v_lshrrev_b32_e32 v12, v4, v0 151; GISEL-NEXT: v_lshrrev_b32_e32 v13, v5, v1 152; GISEL-NEXT: v_lshrrev_b32_e32 v14, v6, v2 153; GISEL-NEXT: v_lshrrev_b32_e32 v15, v7, v3 154; GISEL-NEXT: v_ashrrev_i32_e32 v0, v4, v0 155; GISEL-NEXT: v_ashrrev_i32_e32 v1, v5, v1 156; GISEL-NEXT: v_ashrrev_i32_e32 v2, v6, v2 157; GISEL-NEXT: v_ashrrev_i32_e32 v3, v7, v3 158; GISEL-NEXT: v_add3_u32 v0, v8, v12, v0 159; GISEL-NEXT: v_add3_u32 v1, v9, v13, v1 160; GISEL-NEXT: v_add3_u32 v2, v10, v14, v2 161; GISEL-NEXT: v_add3_u32 v3, v11, v15, v3 162; GISEL-NEXT: s_setpc_b64 s[30:31] 163 %and = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31> 164 %shl = shl <4 x i32> %a, %and 165 %lshr = lshr <4 x i32> %a, %and 166 %ashr = ashr <4 x i32> %a, %and 167 %ret.0 = add <4 x i32> %shl, %lshr 168 %ret = add <4 x i32> %ret.0, %ashr 169 ret <4 x i32> %ret 170} 171 172define amdgpu_ps <4 x i32> @s_csh_v4i32(<4 x i32> inreg %a, <4 x i32> inreg %b) { 173; CHECK-LABEL: s_csh_v4i32: 174; CHECK: ; %bb.0: 175; CHECK-NEXT: s_lshl_b32 s8, s0, s4 176; CHECK-NEXT: s_lshl_b32 s9, s1, s5 177; CHECK-NEXT: s_lshl_b32 s10, s2, s6 178; CHECK-NEXT: s_lshl_b32 s11, s3, s7 179; CHECK-NEXT: s_lshr_b32 s12, s0, s4 180; CHECK-NEXT: s_lshr_b32 s13, s1, s5 181; CHECK-NEXT: s_lshr_b32 s14, s2, s6 182; CHECK-NEXT: s_lshr_b32 s15, s3, s7 183; CHECK-NEXT: s_ashr_i32 s3, s3, s7 184; CHECK-NEXT: s_ashr_i32 s2, s2, s6 185; CHECK-NEXT: s_ashr_i32 s1, s1, s5 186; CHECK-NEXT: s_ashr_i32 s0, s0, s4 187; CHECK-NEXT: s_add_i32 s4, s11, s15 188; CHECK-NEXT: s_add_i32 s5, s10, s14 189; CHECK-NEXT: s_add_i32 s6, s9, s13 190; CHECK-NEXT: s_add_i32 s7, s8, s12 191; CHECK-NEXT: s_add_i32 s0, s7, s0 192; CHECK-NEXT: s_add_i32 s1, s6, s1 193; CHECK-NEXT: s_add_i32 s2, s5, s2 194; CHECK-NEXT: s_add_i32 s3, s4, s3 195; CHECK-NEXT: ; return to shader part epilog 196; 197; GISEL-LABEL: s_csh_v4i32: 198; GISEL: ; %bb.0: 199; GISEL-NEXT: s_mov_b32 s8, 31 200; GISEL-NEXT: s_mov_b32 s9, s8 201; GISEL-NEXT: s_and_b64 s[4:5], s[4:5], s[8:9] 202; GISEL-NEXT: s_and_b64 s[6:7], s[6:7], s[8:9] 203; GISEL-NEXT: s_lshl_b32 s8, s0, s4 204; GISEL-NEXT: s_lshl_b32 s9, s1, s5 205; GISEL-NEXT: s_lshl_b32 s10, s2, s6 206; GISEL-NEXT: s_lshl_b32 s11, s3, s7 207; GISEL-NEXT: s_lshr_b32 s12, s0, s4 208; GISEL-NEXT: s_lshr_b32 s13, s1, s5 209; GISEL-NEXT: s_lshr_b32 s14, s2, s6 210; GISEL-NEXT: s_lshr_b32 s15, s3, s7 211; GISEL-NEXT: s_ashr_i32 s0, s0, s4 212; GISEL-NEXT: s_ashr_i32 s1, s1, s5 213; GISEL-NEXT: s_ashr_i32 s2, s2, s6 214; GISEL-NEXT: s_ashr_i32 s3, s3, s7 215; GISEL-NEXT: s_add_i32 s4, s8, s12 216; GISEL-NEXT: s_add_i32 s5, s9, s13 217; GISEL-NEXT: s_add_i32 s6, s10, s14 218; GISEL-NEXT: s_add_i32 s7, s11, s15 219; GISEL-NEXT: s_add_i32 s0, s4, s0 220; GISEL-NEXT: s_add_i32 s1, s5, s1 221; GISEL-NEXT: s_add_i32 s2, s6, s2 222; GISEL-NEXT: s_add_i32 s3, s7, s3 223; GISEL-NEXT: ; return to shader part epilog 224 %and = and <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31> 225 %shl = shl <4 x i32> %a, %and 226 %lshr = lshr <4 x i32> %a, %and 227 %ashr = ashr <4 x i32> %a, %and 228 %ret.0 = add <4 x i32> %shl, %lshr 229 %ret = add <4 x i32> %ret.0, %ashr 230 ret <4 x i32> %ret 231} 232 233define i64 @csh_64(i64 %a, i64 %b) { 234; CHECK-LABEL: csh_64: 235; CHECK: ; %bb.0: 236; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 237; CHECK-NEXT: v_lshlrev_b64 v[3:4], v2, v[0:1] 238; CHECK-NEXT: v_lshrrev_b64 v[5:6], v2, v[0:1] 239; CHECK-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1] 240; CHECK-NEXT: v_add_co_u32_e32 v2, vcc, v3, v5 241; CHECK-NEXT: v_addc_co_u32_e32 v3, vcc, v4, v6, vcc 242; CHECK-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 243; CHECK-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc 244; CHECK-NEXT: s_setpc_b64 s[30:31] 245; 246; GISEL-LABEL: csh_64: 247; GISEL: ; %bb.0: 248; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 249; GISEL-NEXT: v_and_b32_e32 v6, 63, v2 250; GISEL-NEXT: v_lshlrev_b64 v[2:3], v6, v[0:1] 251; GISEL-NEXT: v_lshrrev_b64 v[4:5], v6, v[0:1] 252; GISEL-NEXT: v_ashrrev_i64 v[0:1], v6, v[0:1] 253; GISEL-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 254; GISEL-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc 255; GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 256; GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc 257; GISEL-NEXT: s_setpc_b64 s[30:31] 258 %and = and i64 %b, 63 259 %shl = shl i64 %a, %and 260 %lshr = lshr i64 %a, %and 261 %ashr = ashr i64 %a, %and 262 %ret.0 = add i64 %shl, %lshr 263 %ret = add i64 %ret.0, %ashr 264 ret i64 %ret 265} 266 267define amdgpu_ps i64 @s_csh_64_0(i64 inreg %a, i64 inreg %b) { 268; CHECK-LABEL: s_csh_64_0: 269; CHECK: ; %bb.0: 270; CHECK-NEXT: s_lshl_b64 s[4:5], s[0:1], s2 271; CHECK-NEXT: s_lshr_b64 s[6:7], s[0:1], s2 272; CHECK-NEXT: s_ashr_i64 s[0:1], s[0:1], s2 273; CHECK-NEXT: s_add_u32 s2, s4, s6 274; CHECK-NEXT: s_addc_u32 s3, s5, s7 275; CHECK-NEXT: s_add_u32 s0, s2, s0 276; CHECK-NEXT: s_addc_u32 s1, s3, s1 277; CHECK-NEXT: ; return to shader part epilog 278; 279; GISEL-LABEL: s_csh_64_0: 280; GISEL: ; %bb.0: 281; GISEL-NEXT: s_and_b64 s[2:3], s[2:3], 63 282; GISEL-NEXT: s_lshl_b64 s[4:5], s[0:1], s2 283; GISEL-NEXT: s_lshr_b64 s[6:7], s[0:1], s2 284; GISEL-NEXT: s_ashr_i64 s[0:1], s[0:1], s2 285; GISEL-NEXT: s_add_u32 s2, s4, s6 286; GISEL-NEXT: s_addc_u32 s3, s5, s7 287; GISEL-NEXT: s_add_u32 s0, s2, s0 288; GISEL-NEXT: s_addc_u32 s1, s3, s1 289; GISEL-NEXT: ; return to shader part epilog 290 %and = and i64 %b, 63 291 %shl = shl i64 %a, %and 292 %lshr = lshr i64 %a, %and 293 %ashr = ashr i64 %a, %and 294 %ret.0 = add i64 %shl, %lshr 295 %ret = add i64 %ret.0, %ashr 296 ret i64 %ret 297} 298 299define amdgpu_ps i64 @s_csh_64_1(i64 inreg %a, i64 inreg %b) { 300; CHECK-LABEL: s_csh_64_1: 301; CHECK: ; %bb.0: 302; CHECK-NEXT: s_lshl_b64 s[4:5], s[0:1], s2 303; CHECK-NEXT: s_lshr_b64 s[6:7], s[0:1], s2 304; CHECK-NEXT: s_ashr_i64 s[0:1], s[0:1], s2 305; CHECK-NEXT: s_add_u32 s2, s4, s6 306; CHECK-NEXT: s_addc_u32 s3, s5, s7 307; CHECK-NEXT: s_add_u32 s0, s2, s0 308; CHECK-NEXT: s_addc_u32 s1, s3, s1 309; CHECK-NEXT: ; return to shader part epilog 310; 311; GISEL-LABEL: s_csh_64_1: 312; GISEL: ; %bb.0: 313; GISEL-NEXT: s_mov_b64 s[4:5], 0xff 314; GISEL-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5] 315; GISEL-NEXT: s_lshl_b64 s[4:5], s[0:1], s2 316; GISEL-NEXT: s_lshr_b64 s[6:7], s[0:1], s2 317; GISEL-NEXT: s_ashr_i64 s[0:1], s[0:1], s2 318; GISEL-NEXT: s_add_u32 s2, s4, s6 319; GISEL-NEXT: s_addc_u32 s3, s5, s7 320; GISEL-NEXT: s_add_u32 s0, s2, s0 321; GISEL-NEXT: s_addc_u32 s1, s3, s1 322; GISEL-NEXT: ; return to shader part epilog 323 %and = and i64 %b, 255 324 %shl = shl i64 %a, %and 325 %lshr = lshr i64 %a, %and 326 %ashr = ashr i64 %a, %and 327 %ret.0 = add i64 %shl, %lshr 328 %ret = add i64 %ret.0, %ashr 329 ret i64 %ret 330} 331 332define i32 @cshl_or(i32 %a, i32 %b) { 333; CHECK-LABEL: cshl_or: 334; CHECK: ; %bb.0: 335; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 336; CHECK-NEXT: v_lshl_or_b32 v0, v0, v1, v0 337; CHECK-NEXT: s_setpc_b64 s[30:31] 338; 339; GISEL-LABEL: cshl_or: 340; GISEL: ; %bb.0: 341; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 342; GISEL-NEXT: v_and_b32_e32 v1, 31, v1 343; GISEL-NEXT: v_lshl_or_b32 v0, v0, v1, v0 344; GISEL-NEXT: s_setpc_b64 s[30:31] 345 %and = and i32 %b, 31 346 %shl = shl i32 %a, %and 347 %or = or i32 %shl, %a 348 ret i32 %or 349} 350 351define i32 @cshl_add(i32 %a, i32 %b, i32 %c) { 352; CHECK-LABEL: cshl_add: 353; CHECK: ; %bb.0: 354; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 355; CHECK-NEXT: v_lshl_add_u32 v0, v0, v1, v2 356; CHECK-NEXT: s_setpc_b64 s[30:31] 357; 358; GISEL-LABEL: cshl_add: 359; GISEL: ; %bb.0: 360; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 361; GISEL-NEXT: v_and_b32_e32 v1, 31, v1 362; GISEL-NEXT: v_lshl_add_u32 v0, v0, v1, v2 363; GISEL-NEXT: s_setpc_b64 s[30:31] 364 %and = and i32 %b, 31 365 %shl = shl i32 %a, %and 366 %add = add i32 %shl, %c 367 ret i32 %add 368} 369 370define i32 @add_cshl(i32 %a, i32 %b) { 371; CHECK-LABEL: add_cshl: 372; CHECK: ; %bb.0: 373; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 374; CHECK-NEXT: v_add_lshl_u32 v0, v0, v1, v1 375; CHECK-NEXT: s_setpc_b64 s[30:31] 376; 377; GISEL-LABEL: add_cshl: 378; GISEL: ; %bb.0: 379; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 380; GISEL-NEXT: v_and_b32_e32 v2, 31, v1 381; GISEL-NEXT: v_add_lshl_u32 v0, v0, v1, v2 382; GISEL-NEXT: s_setpc_b64 s[30:31] 383 %add = add i32 %a, %b 384 %and = and i32 %b, 31 385 %shl = shl i32 %add, %and 386 ret i32 %shl 387} 388