1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck --check-prefix=GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s 4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s 5; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s 6 7define i8 @v_saddsat_i8(i8 %lhs, i8 %rhs) { 8; GFX6-LABEL: v_saddsat_i8: 9; GFX6: ; %bb.0: 10; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 8 12; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 13; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 14; GFX6-NEXT: v_min_i32_e32 v0, 0x7f, v0 15; GFX6-NEXT: v_max_i32_e32 v0, 0xffffff80, v0 16; GFX6-NEXT: s_setpc_b64 s[30:31] 17; 18; GFX8-LABEL: v_saddsat_i8: 19; GFX8: ; %bb.0: 20; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; GFX8-NEXT: v_add_u16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 22; GFX8-NEXT: v_min_i16_e32 v0, 0x7f, v0 23; GFX8-NEXT: v_max_i16_e32 v0, 0xff80, v0 24; GFX8-NEXT: s_setpc_b64 s[30:31] 25; 26; GFX9-LABEL: v_saddsat_i8: 27; GFX9: ; %bb.0: 28; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 29; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 30; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v0 31; GFX9-NEXT: v_add_i16 v0, v0, v1 clamp 32; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v0 33; GFX9-NEXT: s_setpc_b64 s[30:31] 34; 35; GFX10-LABEL: v_saddsat_i8: 36; GFX10: ; %bb.0: 37; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 38; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 39; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 40; GFX10-NEXT: v_lshlrev_b16 v0, 8, v0 41; GFX10-NEXT: v_add_nc_i16 v0, v0, v1 clamp 42; GFX10-NEXT: v_ashrrev_i16 v0, 8, v0 43; GFX10-NEXT: s_setpc_b64 s[30:31] 44 %result = call i8 @llvm.sadd.sat.i8(i8 %lhs, i8 %rhs) 45 ret i8 %result 46} 47 48define i16 @v_saddsat_i16(i16 %lhs, i16 %rhs) { 49; GFX6-LABEL: v_saddsat_i16: 50; GFX6: ; %bb.0: 51; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 53; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 54; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 55; GFX6-NEXT: v_min_i32_e32 v0, 0x7fff, v0 56; GFX6-NEXT: v_max_i32_e32 v0, 0xffff8000, v0 57; GFX6-NEXT: s_setpc_b64 s[30:31] 58; 59; GFX8-LABEL: v_saddsat_i16: 60; GFX8: ; %bb.0: 61; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 62; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v1 63; GFX8-NEXT: v_add_u16_e32 v1, v0, v1 64; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0 65; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1 66; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0 67; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 68; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 69; GFX8-NEXT: s_setpc_b64 s[30:31] 70; 71; GFX9-LABEL: v_saddsat_i16: 72; GFX9: ; %bb.0: 73; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 74; GFX9-NEXT: v_add_i16 v0, v0, v1 clamp 75; GFX9-NEXT: s_setpc_b64 s[30:31] 76; 77; GFX10-LABEL: v_saddsat_i16: 78; GFX10: ; %bb.0: 79; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 81; GFX10-NEXT: v_add_nc_i16 v0, v0, v1 clamp 82; GFX10-NEXT: s_setpc_b64 s[30:31] 83 %result = call i16 @llvm.sadd.sat.i16(i16 %lhs, i16 %rhs) 84 ret i16 %result 85} 86 87define i32 @v_saddsat_i32(i32 %lhs, i32 %rhs) { 88; GFX6-LABEL: v_saddsat_i32: 89; GFX6: ; %bb.0: 90; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 91; GFX6-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 92; GFX6-NEXT: v_add_i32_e64 v1, s[4:5], v0, v1 93; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0 94; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v1 95; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 96; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 97; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 98; GFX6-NEXT: s_setpc_b64 s[30:31] 99; 100; GFX8-LABEL: v_saddsat_i32: 101; GFX8: ; %bb.0: 102; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX8-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 104; GFX8-NEXT: v_add_u32_e64 v1, s[4:5], v0, v1 105; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0 106; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v1 107; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 108; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 109; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 110; GFX8-NEXT: s_setpc_b64 s[30:31] 111; 112; GFX9-LABEL: v_saddsat_i32: 113; GFX9: ; %bb.0: 114; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 115; GFX9-NEXT: v_add_i32 v0, v0, v1 clamp 116; GFX9-NEXT: s_setpc_b64 s[30:31] 117; 118; GFX10-LABEL: v_saddsat_i32: 119; GFX10: ; %bb.0: 120; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 122; GFX10-NEXT: v_add_nc_i32 v0, v0, v1 clamp 123; GFX10-NEXT: s_setpc_b64 s[30:31] 124 %result = call i32 @llvm.sadd.sat.i32(i32 %lhs, i32 %rhs) 125 ret i32 %result 126} 127 128define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) { 129; GFX6-LABEL: v_saddsat_v2i16: 130; GFX6: ; %bb.0: 131; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 132; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 133; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 134; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 135; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 136; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 137; GFX6-NEXT: s_movk_i32 s4, 0x7fff 138; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 139; GFX6-NEXT: v_min_i32_e32 v1, s4, v1 140; GFX6-NEXT: s_movk_i32 s5, 0x8000 141; GFX6-NEXT: v_min_i32_e32 v0, s4, v0 142; GFX6-NEXT: v_max_i32_e32 v1, s5, v1 143; GFX6-NEXT: v_max_i32_e32 v0, s5, v0 144; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 145; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 146; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 147; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 148; GFX6-NEXT: s_setpc_b64 s[30:31] 149; 150; GFX8-LABEL: v_saddsat_v2i16: 151; GFX8: ; %bb.0: 152; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 153; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1 154; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 155; GFX8-NEXT: v_add_u16_e32 v4, v3, v2 156; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v4, v3 157; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v2 158; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v4 159; GFX8-NEXT: s_movk_i32 s6, 0x8000 160; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2 161; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 162; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 163; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v1 164; GFX8-NEXT: v_add_u16_e32 v1, v0, v1 165; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0 166; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1 167; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0 168; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 169; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 170; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 171; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 172; GFX8-NEXT: s_setpc_b64 s[30:31] 173; 174; GFX9-LABEL: v_saddsat_v2i16: 175; GFX9: ; %bb.0: 176; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 177; GFX9-NEXT: v_pk_add_i16 v0, v0, v1 clamp 178; GFX9-NEXT: s_setpc_b64 s[30:31] 179; 180; GFX10-LABEL: v_saddsat_v2i16: 181; GFX10: ; %bb.0: 182; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 183; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 184; GFX10-NEXT: v_pk_add_i16 v0, v0, v1 clamp 185; GFX10-NEXT: s_setpc_b64 s[30:31] 186 %result = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) 187 ret <2 x i16> %result 188} 189 190define <3 x i16> @v_saddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) { 191; GFX6-LABEL: v_saddsat_v3i16: 192; GFX6: ; %bb.0: 193; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 194; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 195; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 196; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16 197; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 198; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16 199; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 200; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v4 201; GFX6-NEXT: s_movk_i32 s4, 0x7fff 202; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v3 203; GFX6-NEXT: v_min_i32_e32 v1, s4, v1 204; GFX6-NEXT: s_movk_i32 s5, 0x8000 205; GFX6-NEXT: v_min_i32_e32 v0, s4, v0 206; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5 207; GFX6-NEXT: v_max_i32_e32 v1, s5, v1 208; GFX6-NEXT: v_max_i32_e32 v0, s5, v0 209; GFX6-NEXT: v_min_i32_e32 v2, s4, v2 210; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 211; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 212; GFX6-NEXT: v_max_i32_e32 v3, s5, v2 213; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 214; GFX6-NEXT: v_or_b32_e32 v2, 0xffff0000, v3 215; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16 216; GFX6-NEXT: s_setpc_b64 s[30:31] 217; 218; GFX8-LABEL: v_saddsat_v3i16: 219; GFX8: ; %bb.0: 220; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 221; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 222; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0 223; GFX8-NEXT: v_add_u16_e32 v6, v5, v4 224; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5 225; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v4 226; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6 227; GFX8-NEXT: s_movk_i32 s6, 0x8000 228; GFX8-NEXT: v_xor_b32_e32 v4, s6, v4 229; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 230; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc 231; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v3 232; GFX8-NEXT: v_add_u16_e32 v3, v1, v3 233; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1 234; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3 235; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1 236; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 237; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 238; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v2 239; GFX8-NEXT: v_add_u16_e32 v2, v0, v2 240; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0 241; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2 242; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0 243; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 244; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 245; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4 246; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 247; GFX8-NEXT: s_setpc_b64 s[30:31] 248; 249; GFX9-LABEL: v_saddsat_v3i16: 250; GFX9: ; %bb.0: 251; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 252; GFX9-NEXT: v_pk_add_i16 v1, v1, v3 clamp 253; GFX9-NEXT: v_pk_add_i16 v0, v0, v2 clamp 254; GFX9-NEXT: s_setpc_b64 s[30:31] 255; 256; GFX10-LABEL: v_saddsat_v3i16: 257; GFX10: ; %bb.0: 258; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 259; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 260; GFX10-NEXT: v_pk_add_i16 v0, v0, v2 clamp 261; GFX10-NEXT: v_pk_add_i16 v1, v1, v3 clamp 262; GFX10-NEXT: s_setpc_b64 s[30:31] 263 %result = call <3 x i16> @llvm.sadd.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs) 264 ret <3 x i16> %result 265} 266 267define <2 x float> @v_saddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 268; GFX6-LABEL: v_saddsat_v4i16: 269; GFX6: ; %bb.0: 270; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 271; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16 272; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 273; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16 274; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 275; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v5 276; GFX6-NEXT: s_movk_i32 s4, 0x7fff 277; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v4 278; GFX6-NEXT: v_min_i32_e32 v1, s4, v1 279; GFX6-NEXT: s_movk_i32 s5, 0x8000 280; GFX6-NEXT: v_min_i32_e32 v0, s4, v0 281; GFX6-NEXT: v_max_i32_e32 v1, s5, v1 282; GFX6-NEXT: v_max_i32_e32 v0, s5, v0 283; GFX6-NEXT: s_mov_b32 s6, 0xffff 284; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16 285; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 286; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16 287; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 288; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 289; GFX6-NEXT: v_and_b32_e32 v0, s6, v0 290; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 291; GFX6-NEXT: v_add_i32_e32 v1, vcc, v3, v7 292; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v6 293; GFX6-NEXT: v_min_i32_e32 v1, s4, v1 294; GFX6-NEXT: v_min_i32_e32 v2, s4, v2 295; GFX6-NEXT: v_max_i32_e32 v1, s5, v1 296; GFX6-NEXT: v_max_i32_e32 v2, s5, v2 297; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 298; GFX6-NEXT: v_and_b32_e32 v2, s6, v2 299; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 300; GFX6-NEXT: s_setpc_b64 s[30:31] 301; 302; GFX8-LABEL: v_saddsat_v4i16: 303; GFX8: ; %bb.0: 304; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 305; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 306; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0 307; GFX8-NEXT: v_add_u16_e32 v6, v5, v4 308; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5 309; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v4 310; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6 311; GFX8-NEXT: s_movk_i32 s6, 0x8000 312; GFX8-NEXT: v_xor_b32_e32 v4, s6, v4 313; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 314; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc 315; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v2 316; GFX8-NEXT: v_add_u16_e32 v2, v0, v2 317; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0 318; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2 319; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0 320; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 321; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4 322; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 323; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 324; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v3 325; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 326; GFX8-NEXT: v_add_u16_e32 v5, v4, v2 327; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v5, v4 328; GFX8-NEXT: v_cmp_gt_i16_e64 s[4:5], 0, v2 329; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v5 330; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2 331; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 332; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc 333; GFX8-NEXT: v_cmp_gt_i16_e32 vcc, 0, v3 334; GFX8-NEXT: v_add_u16_e32 v3, v1, v3 335; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1 336; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3 337; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1 338; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 339; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 340; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 341; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 342; GFX8-NEXT: s_setpc_b64 s[30:31] 343; 344; GFX9-LABEL: v_saddsat_v4i16: 345; GFX9: ; %bb.0: 346; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 347; GFX9-NEXT: v_pk_add_i16 v0, v0, v2 clamp 348; GFX9-NEXT: v_pk_add_i16 v1, v1, v3 clamp 349; GFX9-NEXT: s_setpc_b64 s[30:31] 350; 351; GFX10-LABEL: v_saddsat_v4i16: 352; GFX10: ; %bb.0: 353; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 354; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 355; GFX10-NEXT: v_pk_add_i16 v0, v0, v2 clamp 356; GFX10-NEXT: v_pk_add_i16 v1, v1, v3 clamp 357; GFX10-NEXT: s_setpc_b64 s[30:31] 358 %result = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 359 %cast = bitcast <4 x i16> %result to <2 x float> 360 ret <2 x float> %cast 361} 362 363define <2 x i32> @v_saddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 364; GFX6-LABEL: v_saddsat_v2i32: 365; GFX6: ; %bb.0: 366; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 367; GFX6-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 368; GFX6-NEXT: v_add_i32_e64 v2, s[4:5], v0, v2 369; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0 370; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v2 371; GFX6-NEXT: s_brev_b32 s6, 1 372; GFX6-NEXT: v_xor_b32_e32 v0, s6, v0 373; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 374; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 375; GFX6-NEXT: v_add_i32_e64 v2, s[4:5], v1, v3 376; GFX6-NEXT: v_cmp_gt_i32_e32 vcc, 0, v3 377; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1 378; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v2 379; GFX6-NEXT: v_xor_b32_e32 v1, s6, v1 380; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 381; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 382; GFX6-NEXT: s_setpc_b64 s[30:31] 383; 384; GFX8-LABEL: v_saddsat_v2i32: 385; GFX8: ; %bb.0: 386; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 387; GFX8-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 388; GFX8-NEXT: v_add_u32_e64 v2, s[4:5], v0, v2 389; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0 390; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v2 391; GFX8-NEXT: s_brev_b32 s6, 1 392; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0 393; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 394; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 395; GFX8-NEXT: v_add_u32_e64 v2, s[4:5], v1, v3 396; GFX8-NEXT: v_cmp_gt_i32_e32 vcc, 0, v3 397; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1 398; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v2 399; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1 400; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 401; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 402; GFX8-NEXT: s_setpc_b64 s[30:31] 403; 404; GFX9-LABEL: v_saddsat_v2i32: 405; GFX9: ; %bb.0: 406; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 407; GFX9-NEXT: v_add_i32 v0, v0, v2 clamp 408; GFX9-NEXT: v_add_i32 v1, v1, v3 clamp 409; GFX9-NEXT: s_setpc_b64 s[30:31] 410; 411; GFX10-LABEL: v_saddsat_v2i32: 412; GFX10: ; %bb.0: 413; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 414; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 415; GFX10-NEXT: v_add_nc_i32 v0, v0, v2 clamp 416; GFX10-NEXT: v_add_nc_i32 v1, v1, v3 clamp 417; GFX10-NEXT: s_setpc_b64 s[30:31] 418 %result = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 419 ret <2 x i32> %result 420} 421 422define i64 @v_saddsat_i64(i64 %lhs, i64 %rhs) { 423; GFX6-LABEL: v_saddsat_i64: 424; GFX6: ; %bb.0: 425; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 426; GFX6-NEXT: v_add_i32_e32 v4, vcc, v0, v2 427; GFX6-NEXT: v_addc_u32_e32 v5, vcc, v1, v3, vcc 428; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 429; GFX6-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3] 430; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v5 431; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc 432; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 433; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 434; GFX6-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 435; GFX6-NEXT: s_setpc_b64 s[30:31] 436; 437; GFX8-LABEL: v_saddsat_i64: 438; GFX8: ; %bb.0: 439; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 440; GFX8-NEXT: v_add_u32_e32 v4, vcc, v0, v2 441; GFX8-NEXT: v_addc_u32_e32 v5, vcc, v1, v3, vcc 442; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 443; GFX8-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3] 444; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v5 445; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 446; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 447; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 448; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 449; GFX8-NEXT: s_setpc_b64 s[30:31] 450; 451; GFX9-LABEL: v_saddsat_i64: 452; GFX9: ; %bb.0: 453; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 454; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v0, v2 455; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v3, vcc 456; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 457; GFX9-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3] 458; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v5 459; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc 460; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 461; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 462; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 463; GFX9-NEXT: s_setpc_b64 s[30:31] 464; 465; GFX10-LABEL: v_saddsat_i64: 466; GFX10: ; %bb.0: 467; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 468; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 469; GFX10-NEXT: v_add_co_u32 v4, vcc_lo, v0, v2 470; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo 471; GFX10-NEXT: v_cmp_gt_i64_e64 s4, 0, v[2:3] 472; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5 473; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1] 474; GFX10-NEXT: v_xor_b32_e32 v1, 0x80000000, v6 475; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo 476; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc_lo 477; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo 478; GFX10-NEXT: s_setpc_b64 s[30:31] 479 %result = call i64 @llvm.sadd.sat.i64(i64 %lhs, i64 %rhs) 480 ret i64 %result 481} 482 483declare i8 @llvm.sadd.sat.i8(i8, i8) #0 484declare i16 @llvm.sadd.sat.i16(i16, i16) #0 485declare <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>) #0 486declare <3 x i16> @llvm.sadd.sat.v3i16(<3 x i16>, <3 x i16>) #0 487declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>) #0 488declare i32 @llvm.sadd.sat.i32(i32, i32) #0 489declare <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32>, <2 x i32>) #0 490declare i64 @llvm.sadd.sat.i64(i64, i64) #0 491