1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck --check-prefix=GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s 4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s 5; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s 6 7define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) { 8; GFX6-LABEL: v_ssubsat_i8: 9; GFX6: ; %bb.0: 10; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 8 12; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 13; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 14; GFX6-NEXT: v_min_i32_e32 v0, 0x7f, v0 15; GFX6-NEXT: v_max_i32_e32 v0, 0xffffff80, v0 16; GFX6-NEXT: s_setpc_b64 s[30:31] 17; 18; GFX8-LABEL: v_ssubsat_i8: 19; GFX8: ; %bb.0: 20; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; GFX8-NEXT: v_sub_u16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 22; GFX8-NEXT: v_min_i16_e32 v0, 0x7f, v0 23; GFX8-NEXT: v_max_i16_e32 v0, 0xff80, v0 24; GFX8-NEXT: s_setpc_b64 s[30:31] 25; 26; GFX9-LABEL: v_ssubsat_i8: 27; GFX9: ; %bb.0: 28; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 29; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 30; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v0 31; GFX9-NEXT: v_sub_i16 v0, v0, v1 clamp 32; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v0 33; GFX9-NEXT: s_setpc_b64 s[30:31] 34; 35; GFX10-LABEL: v_ssubsat_i8: 36; GFX10: ; %bb.0: 37; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 38; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 39; GFX10-NEXT: v_lshlrev_b16 v1, 8, v1 40; GFX10-NEXT: v_lshlrev_b16 v0, 8, v0 41; GFX10-NEXT: v_sub_nc_i16 v0, v0, v1 clamp 42; GFX10-NEXT: v_ashrrev_i16 v0, 8, v0 43; GFX10-NEXT: s_setpc_b64 s[30:31] 44 %result = call i8 @llvm.ssub.sat.i8(i8 %lhs, i8 %rhs) 45 ret i8 %result 46} 47 48define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) { 49; GFX6-LABEL: v_ssubsat_i16: 50; GFX6: ; %bb.0: 51; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 53; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 54; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 55; GFX6-NEXT: v_min_i32_e32 v0, 0x7fff, v0 56; GFX6-NEXT: v_max_i32_e32 v0, 0xffff8000, v0 57; GFX6-NEXT: s_setpc_b64 s[30:31] 58; 59; GFX8-LABEL: v_ssubsat_i16: 60; GFX8: ; %bb.0: 61; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 62; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v1 63; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1 64; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0 65; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1 66; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0 67; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 68; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 69; GFX8-NEXT: s_setpc_b64 s[30:31] 70; 71; GFX9-LABEL: v_ssubsat_i16: 72; GFX9: ; %bb.0: 73; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 74; GFX9-NEXT: v_sub_i16 v0, v0, v1 clamp 75; GFX9-NEXT: s_setpc_b64 s[30:31] 76; 77; GFX10-LABEL: v_ssubsat_i16: 78; GFX10: ; %bb.0: 79; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 81; GFX10-NEXT: v_sub_nc_i16 v0, v0, v1 clamp 82; GFX10-NEXT: s_setpc_b64 s[30:31] 83 %result = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs) 84 ret i16 %result 85} 86 87define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) { 88; GFX6-LABEL: v_ssubsat_i32: 89; GFX6: ; %bb.0: 90; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 91; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1 92; GFX6-NEXT: v_sub_i32_e64 v1, s[4:5], v0, v1 93; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0 94; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v1 95; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 96; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 97; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 98; GFX6-NEXT: s_setpc_b64 s[30:31] 99; 100; GFX8-LABEL: v_ssubsat_i32: 101; GFX8: ; %bb.0: 102; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1 104; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], v0, v1 105; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0 106; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v1 107; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 108; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 109; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 110; GFX8-NEXT: s_setpc_b64 s[30:31] 111; 112; GFX9-LABEL: v_ssubsat_i32: 113; GFX9: ; %bb.0: 114; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 115; GFX9-NEXT: v_sub_i32 v0, v0, v1 clamp 116; GFX9-NEXT: s_setpc_b64 s[30:31] 117; 118; GFX10-LABEL: v_ssubsat_i32: 119; GFX10: ; %bb.0: 120; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 122; GFX10-NEXT: v_sub_nc_i32 v0, v0, v1 clamp 123; GFX10-NEXT: s_setpc_b64 s[30:31] 124 %result = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs) 125 ret i32 %result 126} 127 128define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) { 129; GFX6-LABEL: v_ssubsat_v2i16: 130; GFX6: ; %bb.0: 131; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 132; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 133; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 134; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 135; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 136; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 137; GFX6-NEXT: s_movk_i32 s4, 0x7fff 138; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 139; GFX6-NEXT: v_min_i32_e32 v1, s4, v1 140; GFX6-NEXT: s_movk_i32 s5, 0x8000 141; GFX6-NEXT: v_min_i32_e32 v0, s4, v0 142; GFX6-NEXT: v_max_i32_e32 v1, s5, v1 143; GFX6-NEXT: v_max_i32_e32 v0, s5, v0 144; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 145; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 146; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 147; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 148; GFX6-NEXT: s_setpc_b64 s[30:31] 149; 150; GFX8-LABEL: v_ssubsat_v2i16: 151; GFX8: ; %bb.0: 152; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 153; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1 154; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 155; GFX8-NEXT: v_sub_u16_e32 v4, v3, v2 156; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v4, v3 157; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v2 158; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v4 159; GFX8-NEXT: s_movk_i32 s6, 0x8000 160; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2 161; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 162; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 163; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v1 164; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1 165; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0 166; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1 167; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0 168; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 169; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 170; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 171; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 172; GFX8-NEXT: s_setpc_b64 s[30:31] 173; 174; GFX9-LABEL: v_ssubsat_v2i16: 175; GFX9: ; %bb.0: 176; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 177; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 clamp 178; GFX9-NEXT: s_setpc_b64 s[30:31] 179; 180; GFX10-LABEL: v_ssubsat_v2i16: 181; GFX10: ; %bb.0: 182; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 183; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 184; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 clamp 185; GFX10-NEXT: s_setpc_b64 s[30:31] 186 %result = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) 187 ret <2 x i16> %result 188} 189 190define <3 x i16> @v_ssubsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) { 191; GFX6-LABEL: v_ssubsat_v3i16: 192; GFX6: ; %bb.0: 193; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 194; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 195; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 196; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16 197; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 198; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16 199; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 200; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 201; GFX6-NEXT: s_movk_i32 s4, 0x7fff 202; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 203; GFX6-NEXT: v_min_i32_e32 v1, s4, v1 204; GFX6-NEXT: s_movk_i32 s5, 0x8000 205; GFX6-NEXT: v_min_i32_e32 v0, s4, v0 206; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 207; GFX6-NEXT: v_max_i32_e32 v1, s5, v1 208; GFX6-NEXT: v_max_i32_e32 v0, s5, v0 209; GFX6-NEXT: s_mov_b32 s6, 0xffff 210; GFX6-NEXT: v_min_i32_e32 v2, s4, v2 211; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 212; GFX6-NEXT: v_and_b32_e32 v0, s6, v0 213; GFX6-NEXT: v_max_i32_e32 v3, s5, v2 214; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 215; GFX6-NEXT: v_and_b32_e32 v2, s6, v3 216; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16 217; GFX6-NEXT: s_setpc_b64 s[30:31] 218; 219; GFX8-LABEL: v_ssubsat_v3i16: 220; GFX8: ; %bb.0: 221; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 222; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 223; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0 224; GFX8-NEXT: v_sub_u16_e32 v6, v5, v4 225; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5 226; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v4 227; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6 228; GFX8-NEXT: s_movk_i32 s6, 0x8000 229; GFX8-NEXT: v_xor_b32_e32 v4, s6, v4 230; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 231; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc 232; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v3 233; GFX8-NEXT: v_sub_u16_e32 v3, v1, v3 234; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1 235; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3 236; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1 237; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 238; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 239; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v2 240; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2 241; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0 242; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2 243; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0 244; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 245; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 246; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4 247; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 248; GFX8-NEXT: s_setpc_b64 s[30:31] 249; 250; GFX9-LABEL: v_ssubsat_v3i16: 251; GFX9: ; %bb.0: 252; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 253; GFX9-NEXT: v_pk_sub_i16 v1, v1, v3 clamp 254; GFX9-NEXT: v_pk_sub_i16 v0, v0, v2 clamp 255; GFX9-NEXT: s_setpc_b64 s[30:31] 256; 257; GFX10-LABEL: v_ssubsat_v3i16: 258; GFX10: ; %bb.0: 259; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 260; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 261; GFX10-NEXT: v_pk_sub_i16 v0, v0, v2 clamp 262; GFX10-NEXT: v_pk_sub_i16 v1, v1, v3 clamp 263; GFX10-NEXT: s_setpc_b64 s[30:31] 264 %result = call <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs) 265 ret <3 x i16> %result 266} 267 268define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 269; GFX6-LABEL: v_ssubsat_v4i16: 270; GFX6: ; %bb.0: 271; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 272; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16 273; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 274; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16 275; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 276; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 277; GFX6-NEXT: s_movk_i32 s4, 0x7fff 278; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 279; GFX6-NEXT: v_min_i32_e32 v1, s4, v1 280; GFX6-NEXT: s_movk_i32 s5, 0x8000 281; GFX6-NEXT: v_min_i32_e32 v0, s4, v0 282; GFX6-NEXT: v_max_i32_e32 v1, s5, v1 283; GFX6-NEXT: v_max_i32_e32 v0, s5, v0 284; GFX6-NEXT: s_mov_b32 s6, 0xffff 285; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16 286; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 287; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16 288; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 289; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 290; GFX6-NEXT: v_and_b32_e32 v0, s6, v0 291; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 292; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v3, v7 293; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 294; GFX6-NEXT: v_min_i32_e32 v1, s4, v1 295; GFX6-NEXT: v_min_i32_e32 v2, s4, v2 296; GFX6-NEXT: v_max_i32_e32 v1, s5, v1 297; GFX6-NEXT: v_max_i32_e32 v2, s5, v2 298; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 299; GFX6-NEXT: v_and_b32_e32 v2, s6, v2 300; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 301; GFX6-NEXT: s_setpc_b64 s[30:31] 302; 303; GFX8-LABEL: v_ssubsat_v4i16: 304; GFX8: ; %bb.0: 305; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 306; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 307; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0 308; GFX8-NEXT: v_sub_u16_e32 v6, v5, v4 309; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5 310; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v4 311; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6 312; GFX8-NEXT: s_movk_i32 s6, 0x8000 313; GFX8-NEXT: v_xor_b32_e32 v4, s6, v4 314; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 315; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc 316; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v2 317; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2 318; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0 319; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2 320; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0 321; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 322; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4 323; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 324; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 325; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v3 326; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 327; GFX8-NEXT: v_sub_u16_e32 v5, v4, v2 328; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v5, v4 329; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v2 330; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v5 331; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2 332; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 333; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc 334; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v3 335; GFX8-NEXT: v_sub_u16_e32 v3, v1, v3 336; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1 337; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3 338; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1 339; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 340; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 341; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 342; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 343; GFX8-NEXT: s_setpc_b64 s[30:31] 344; 345; GFX9-LABEL: v_ssubsat_v4i16: 346; GFX9: ; %bb.0: 347; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 348; GFX9-NEXT: v_pk_sub_i16 v0, v0, v2 clamp 349; GFX9-NEXT: v_pk_sub_i16 v1, v1, v3 clamp 350; GFX9-NEXT: s_setpc_b64 s[30:31] 351; 352; GFX10-LABEL: v_ssubsat_v4i16: 353; GFX10: ; %bb.0: 354; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 355; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 356; GFX10-NEXT: v_pk_sub_i16 v0, v0, v2 clamp 357; GFX10-NEXT: v_pk_sub_i16 v1, v1, v3 clamp 358; GFX10-NEXT: s_setpc_b64 s[30:31] 359 %result = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 360 %cast = bitcast <4 x i16> %result to <2 x float> 361 ret <2 x float> %cast 362} 363 364define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 365; GFX6-LABEL: v_ssubsat_v2i32: 366; GFX6: ; %bb.0: 367; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 368; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2 369; GFX6-NEXT: v_sub_i32_e64 v2, s[4:5], v0, v2 370; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0 371; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v2 372; GFX6-NEXT: s_brev_b32 s6, 1 373; GFX6-NEXT: v_xor_b32_e32 v0, s6, v0 374; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 375; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 376; GFX6-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v3 377; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3 378; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1 379; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v2 380; GFX6-NEXT: v_xor_b32_e32 v1, s6, v1 381; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 382; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 383; GFX6-NEXT: s_setpc_b64 s[30:31] 384; 385; GFX8-LABEL: v_ssubsat_v2i32: 386; GFX8: ; %bb.0: 387; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 388; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2 389; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v0, v2 390; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0 391; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v2 392; GFX8-NEXT: s_brev_b32 s6, 1 393; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0 394; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 395; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 396; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v1, v3 397; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3 398; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1 399; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v2 400; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1 401; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 402; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 403; GFX8-NEXT: s_setpc_b64 s[30:31] 404; 405; GFX9-LABEL: v_ssubsat_v2i32: 406; GFX9: ; %bb.0: 407; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 408; GFX9-NEXT: v_sub_i32 v0, v0, v2 clamp 409; GFX9-NEXT: v_sub_i32 v1, v1, v3 clamp 410; GFX9-NEXT: s_setpc_b64 s[30:31] 411; 412; GFX10-LABEL: v_ssubsat_v2i32: 413; GFX10: ; %bb.0: 414; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 415; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 416; GFX10-NEXT: v_sub_nc_i32 v0, v0, v2 clamp 417; GFX10-NEXT: v_sub_nc_i32 v1, v1, v3 clamp 418; GFX10-NEXT: s_setpc_b64 s[30:31] 419 %result = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 420 ret <2 x i32> %result 421} 422 423define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) { 424; GFX6-LABEL: v_ssubsat_v3i32: 425; GFX6: ; %bb.0: 426; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 427; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3 428; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v3 429; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0 430; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v3 431; GFX6-NEXT: s_brev_b32 s6, 1 432; GFX6-NEXT: v_xor_b32_e32 v0, s6, v0 433; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 434; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 435; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v4 436; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4 437; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v1 438; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v3 439; GFX6-NEXT: v_xor_b32_e32 v1, s6, v1 440; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 441; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 442; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v5 443; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5 444; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v2 445; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v3 446; GFX6-NEXT: v_xor_b32_e32 v2, s6, v2 447; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 448; GFX6-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 449; GFX6-NEXT: s_setpc_b64 s[30:31] 450; 451; GFX8-LABEL: v_ssubsat_v3i32: 452; GFX8: ; %bb.0: 453; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 454; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3 455; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v0, v3 456; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0 457; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v3 458; GFX8-NEXT: s_brev_b32 s6, 1 459; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0 460; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 461; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 462; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v1, v4 463; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4 464; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v1 465; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v3 466; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1 467; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 468; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 469; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v2, v5 470; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5 471; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v2 472; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v3 473; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2 474; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 475; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 476; GFX8-NEXT: s_setpc_b64 s[30:31] 477; 478; GFX9-LABEL: v_ssubsat_v3i32: 479; GFX9: ; %bb.0: 480; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 481; GFX9-NEXT: v_sub_i32 v0, v0, v3 clamp 482; GFX9-NEXT: v_sub_i32 v1, v1, v4 clamp 483; GFX9-NEXT: v_sub_i32 v2, v2, v5 clamp 484; GFX9-NEXT: s_setpc_b64 s[30:31] 485; 486; GFX10-LABEL: v_ssubsat_v3i32: 487; GFX10: ; %bb.0: 488; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 489; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 490; GFX10-NEXT: v_sub_nc_i32 v0, v0, v3 clamp 491; GFX10-NEXT: v_sub_nc_i32 v1, v1, v4 clamp 492; GFX10-NEXT: v_sub_nc_i32 v2, v2, v5 clamp 493; GFX10-NEXT: s_setpc_b64 s[30:31] 494 %result = call <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32> %lhs, <3 x i32> %rhs) 495 ret <3 x i32> %result 496} 497 498define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 499; GFX6-LABEL: v_ssubsat_v4i32: 500; GFX6: ; %bb.0: 501; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 502; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4 503; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v0, v4 504; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v0 505; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v4 506; GFX6-NEXT: s_brev_b32 s6, 1 507; GFX6-NEXT: v_xor_b32_e32 v0, s6, v0 508; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 509; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 510; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v1, v5 511; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5 512; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v1 513; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v4 514; GFX6-NEXT: v_xor_b32_e32 v1, s6, v1 515; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 516; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc 517; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v6 518; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v6 519; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v2 520; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v4 521; GFX6-NEXT: v_xor_b32_e32 v2, s6, v2 522; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 523; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 524; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v7 525; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7 526; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3 527; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v4 528; GFX6-NEXT: v_xor_b32_e32 v3, s6, v3 529; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 530; GFX6-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc 531; GFX6-NEXT: s_setpc_b64 s[30:31] 532; 533; GFX8-LABEL: v_ssubsat_v4i32: 534; GFX8: ; %bb.0: 535; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 536; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4 537; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v0, v4 538; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v0 539; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v4 540; GFX8-NEXT: s_brev_b32 s6, 1 541; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0 542; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 543; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 544; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v1, v5 545; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5 546; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v1 547; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v4 548; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1 549; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 550; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc 551; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v2, v6 552; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v6 553; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v2 554; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v4 555; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2 556; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 557; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 558; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v3, v7 559; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7 560; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3 561; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v4 562; GFX8-NEXT: v_xor_b32_e32 v3, s6, v3 563; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 564; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc 565; GFX8-NEXT: s_setpc_b64 s[30:31] 566; 567; GFX9-LABEL: v_ssubsat_v4i32: 568; GFX9: ; %bb.0: 569; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 570; GFX9-NEXT: v_sub_i32 v0, v0, v4 clamp 571; GFX9-NEXT: v_sub_i32 v1, v1, v5 clamp 572; GFX9-NEXT: v_sub_i32 v2, v2, v6 clamp 573; GFX9-NEXT: v_sub_i32 v3, v3, v7 clamp 574; GFX9-NEXT: s_setpc_b64 s[30:31] 575; 576; GFX10-LABEL: v_ssubsat_v4i32: 577; GFX10: ; %bb.0: 578; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 579; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 580; GFX10-NEXT: v_sub_nc_i32 v0, v0, v4 clamp 581; GFX10-NEXT: v_sub_nc_i32 v1, v1, v5 clamp 582; GFX10-NEXT: v_sub_nc_i32 v2, v2, v6 clamp 583; GFX10-NEXT: v_sub_nc_i32 v3, v3, v7 clamp 584; GFX10-NEXT: s_setpc_b64 s[30:31] 585 %result = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 586 ret <4 x i32> %result 587} 588 589define <8 x i32> @v_ssubsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) { 590; GFX6-LABEL: v_ssubsat_v8i32: 591; GFX6: ; %bb.0: 592; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 593; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v8 594; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v0, v8 595; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v0 596; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v8 597; GFX6-NEXT: s_brev_b32 s6, 1 598; GFX6-NEXT: v_xor_b32_e32 v0, s6, v0 599; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 600; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc 601; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v1, v9 602; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v9 603; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v1 604; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v8 605; GFX6-NEXT: v_xor_b32_e32 v1, s6, v1 606; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 607; GFX6-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc 608; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v2, v10 609; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v10 610; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v2 611; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v8 612; GFX6-NEXT: v_xor_b32_e32 v2, s6, v2 613; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 614; GFX6-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc 615; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v3, v11 616; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11 617; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3 618; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v8 619; GFX6-NEXT: v_xor_b32_e32 v3, s6, v3 620; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 621; GFX6-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc 622; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v4, v12 623; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12 624; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4 625; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v8 626; GFX6-NEXT: v_xor_b32_e32 v4, s6, v4 627; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 628; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc 629; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v5, v13 630; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13 631; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5 632; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v8 633; GFX6-NEXT: v_xor_b32_e32 v5, s6, v5 634; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 635; GFX6-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc 636; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v6, v14 637; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14 638; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6 639; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v8 640; GFX6-NEXT: v_xor_b32_e32 v6, s6, v6 641; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 642; GFX6-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc 643; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v7, v15 644; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15 645; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7 646; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v8 647; GFX6-NEXT: v_xor_b32_e32 v7, s6, v7 648; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 649; GFX6-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc 650; GFX6-NEXT: s_setpc_b64 s[30:31] 651; 652; GFX8-LABEL: v_ssubsat_v8i32: 653; GFX8: ; %bb.0: 654; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 655; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v8 656; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v0, v8 657; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v0 658; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v8 659; GFX8-NEXT: s_brev_b32 s6, 1 660; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0 661; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 662; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc 663; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v1, v9 664; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v9 665; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v1 666; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v8 667; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1 668; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 669; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc 670; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v2, v10 671; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v10 672; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v2 673; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v8 674; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2 675; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 676; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc 677; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v3, v11 678; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11 679; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3 680; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v8 681; GFX8-NEXT: v_xor_b32_e32 v3, s6, v3 682; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 683; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc 684; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v4, v12 685; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12 686; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4 687; GFX8-NEXT: v_ashrrev_i32_e32 v4, 31, v8 688; GFX8-NEXT: v_xor_b32_e32 v4, s6, v4 689; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 690; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc 691; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v5, v13 692; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13 693; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5 694; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v8 695; GFX8-NEXT: v_xor_b32_e32 v5, s6, v5 696; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 697; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc 698; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v6, v14 699; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14 700; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6 701; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v8 702; GFX8-NEXT: v_xor_b32_e32 v6, s6, v6 703; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 704; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc 705; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v7, v15 706; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15 707; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7 708; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v8 709; GFX8-NEXT: v_xor_b32_e32 v7, s6, v7 710; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 711; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc 712; GFX8-NEXT: s_setpc_b64 s[30:31] 713; 714; GFX9-LABEL: v_ssubsat_v8i32: 715; GFX9: ; %bb.0: 716; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 717; GFX9-NEXT: v_sub_i32 v0, v0, v8 clamp 718; GFX9-NEXT: v_sub_i32 v1, v1, v9 clamp 719; GFX9-NEXT: v_sub_i32 v2, v2, v10 clamp 720; GFX9-NEXT: v_sub_i32 v3, v3, v11 clamp 721; GFX9-NEXT: v_sub_i32 v4, v4, v12 clamp 722; GFX9-NEXT: v_sub_i32 v5, v5, v13 clamp 723; GFX9-NEXT: v_sub_i32 v6, v6, v14 clamp 724; GFX9-NEXT: v_sub_i32 v7, v7, v15 clamp 725; GFX9-NEXT: s_setpc_b64 s[30:31] 726; 727; GFX10-LABEL: v_ssubsat_v8i32: 728; GFX10: ; %bb.0: 729; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 730; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 731; GFX10-NEXT: v_sub_nc_i32 v0, v0, v8 clamp 732; GFX10-NEXT: v_sub_nc_i32 v1, v1, v9 clamp 733; GFX10-NEXT: v_sub_nc_i32 v2, v2, v10 clamp 734; GFX10-NEXT: v_sub_nc_i32 v3, v3, v11 clamp 735; GFX10-NEXT: v_sub_nc_i32 v4, v4, v12 clamp 736; GFX10-NEXT: v_sub_nc_i32 v5, v5, v13 clamp 737; GFX10-NEXT: v_sub_nc_i32 v6, v6, v14 clamp 738; GFX10-NEXT: v_sub_nc_i32 v7, v7, v15 clamp 739; GFX10-NEXT: s_setpc_b64 s[30:31] 740 %result = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %lhs, <8 x i32> %rhs) 741 ret <8 x i32> %result 742} 743 744define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) { 745; GFX6-LABEL: v_ssubsat_v16i32: 746; GFX6: ; %bb.0: 747; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 748; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16 749; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v0, v16 750; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v0 751; GFX6-NEXT: s_brev_b32 s6, 1 752; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v16 753; GFX6-NEXT: v_xor_b32_e32 v0, s6, v0 754; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 755; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc 756; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v1, v17 757; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v17 758; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v1 759; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v16 760; GFX6-NEXT: v_xor_b32_e32 v1, s6, v1 761; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 762; GFX6-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc 763; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v2, v18 764; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v18 765; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v2 766; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v16 767; GFX6-NEXT: v_xor_b32_e32 v2, s6, v2 768; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 769; GFX6-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc 770; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v3, v19 771; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19 772; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3 773; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v16 774; GFX6-NEXT: v_xor_b32_e32 v3, s6, v3 775; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 776; GFX6-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc 777; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v4, v20 778; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20 779; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v4 780; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v16 781; GFX6-NEXT: v_xor_b32_e32 v4, s6, v4 782; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 783; GFX6-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc 784; GFX6-NEXT: buffer_load_dword v16, off, s[0:3], s32 785; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v5, v21 786; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21 787; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v5 788; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v17 789; GFX6-NEXT: v_xor_b32_e32 v5, s6, v5 790; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 791; GFX6-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc 792; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v6, v22 793; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22 794; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v6 795; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v17 796; GFX6-NEXT: v_xor_b32_e32 v6, s6, v6 797; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 798; GFX6-NEXT: v_cndmask_b32_e32 v6, v17, v6, vcc 799; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v7, v23 800; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23 801; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v7 802; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v17 803; GFX6-NEXT: v_xor_b32_e32 v7, s6, v7 804; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 805; GFX6-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc 806; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v8, v24 807; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24 808; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v8 809; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v17 810; GFX6-NEXT: v_xor_b32_e32 v8, s6, v8 811; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 812; GFX6-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc 813; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v9, v25 814; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25 815; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v9 816; GFX6-NEXT: v_ashrrev_i32_e32 v9, 31, v17 817; GFX6-NEXT: v_xor_b32_e32 v9, s6, v9 818; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 819; GFX6-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc 820; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v10, v26 821; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26 822; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v10 823; GFX6-NEXT: v_ashrrev_i32_e32 v10, 31, v17 824; GFX6-NEXT: v_xor_b32_e32 v10, s6, v10 825; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 826; GFX6-NEXT: v_cndmask_b32_e32 v10, v17, v10, vcc 827; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v11, v27 828; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27 829; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v11 830; GFX6-NEXT: v_ashrrev_i32_e32 v11, 31, v17 831; GFX6-NEXT: v_xor_b32_e32 v11, s6, v11 832; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 833; GFX6-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc 834; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v12, v28 835; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28 836; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v12 837; GFX6-NEXT: v_ashrrev_i32_e32 v12, 31, v17 838; GFX6-NEXT: v_xor_b32_e32 v12, s6, v12 839; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 840; GFX6-NEXT: v_cndmask_b32_e32 v12, v17, v12, vcc 841; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v13, v29 842; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29 843; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v13 844; GFX6-NEXT: v_ashrrev_i32_e32 v13, 31, v17 845; GFX6-NEXT: v_xor_b32_e32 v13, s6, v13 846; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 847; GFX6-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc 848; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v14, v30 849; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30 850; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v14 851; GFX6-NEXT: v_ashrrev_i32_e32 v14, 31, v17 852; GFX6-NEXT: v_xor_b32_e32 v14, s6, v14 853; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 854; GFX6-NEXT: v_cndmask_b32_e32 v14, v17, v14, vcc 855; GFX6-NEXT: s_waitcnt vmcnt(0) 856; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16 857; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v15, v16 858; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15 859; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v16 860; GFX6-NEXT: v_xor_b32_e32 v15, s6, v15 861; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 862; GFX6-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc 863; GFX6-NEXT: s_setpc_b64 s[30:31] 864; 865; GFX8-LABEL: v_ssubsat_v16i32: 866; GFX8: ; %bb.0: 867; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 868; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16 869; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v0, v16 870; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v0 871; GFX8-NEXT: s_brev_b32 s6, 1 872; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v16 873; GFX8-NEXT: v_xor_b32_e32 v0, s6, v0 874; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 875; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc 876; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v1, v17 877; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v17 878; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v1 879; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v16 880; GFX8-NEXT: v_xor_b32_e32 v1, s6, v1 881; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 882; GFX8-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc 883; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v2, v18 884; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v18 885; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v2 886; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v16 887; GFX8-NEXT: v_xor_b32_e32 v2, s6, v2 888; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 889; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc 890; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v3, v19 891; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19 892; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3 893; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v16 894; GFX8-NEXT: v_xor_b32_e32 v3, s6, v3 895; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 896; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc 897; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v4, v20 898; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20 899; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v4 900; GFX8-NEXT: v_ashrrev_i32_e32 v4, 31, v16 901; GFX8-NEXT: v_xor_b32_e32 v4, s6, v4 902; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 903; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc 904; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32 905; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v5, v21 906; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21 907; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v5 908; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v17 909; GFX8-NEXT: v_xor_b32_e32 v5, s6, v5 910; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 911; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc 912; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v6, v22 913; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22 914; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v6 915; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v17 916; GFX8-NEXT: v_xor_b32_e32 v6, s6, v6 917; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 918; GFX8-NEXT: v_cndmask_b32_e32 v6, v17, v6, vcc 919; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v7, v23 920; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23 921; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v7 922; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v17 923; GFX8-NEXT: v_xor_b32_e32 v7, s6, v7 924; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 925; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc 926; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v8, v24 927; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24 928; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v8 929; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v17 930; GFX8-NEXT: v_xor_b32_e32 v8, s6, v8 931; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 932; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc 933; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v9, v25 934; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25 935; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v9 936; GFX8-NEXT: v_ashrrev_i32_e32 v9, 31, v17 937; GFX8-NEXT: v_xor_b32_e32 v9, s6, v9 938; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 939; GFX8-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc 940; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v10, v26 941; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26 942; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v10 943; GFX8-NEXT: v_ashrrev_i32_e32 v10, 31, v17 944; GFX8-NEXT: v_xor_b32_e32 v10, s6, v10 945; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 946; GFX8-NEXT: v_cndmask_b32_e32 v10, v17, v10, vcc 947; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v11, v27 948; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27 949; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v11 950; GFX8-NEXT: v_ashrrev_i32_e32 v11, 31, v17 951; GFX8-NEXT: v_xor_b32_e32 v11, s6, v11 952; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 953; GFX8-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc 954; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v12, v28 955; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28 956; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v12 957; GFX8-NEXT: v_ashrrev_i32_e32 v12, 31, v17 958; GFX8-NEXT: v_xor_b32_e32 v12, s6, v12 959; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 960; GFX8-NEXT: v_cndmask_b32_e32 v12, v17, v12, vcc 961; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v13, v29 962; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29 963; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v13 964; GFX8-NEXT: v_ashrrev_i32_e32 v13, 31, v17 965; GFX8-NEXT: v_xor_b32_e32 v13, s6, v13 966; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 967; GFX8-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc 968; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v14, v30 969; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30 970; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v14 971; GFX8-NEXT: v_ashrrev_i32_e32 v14, 31, v17 972; GFX8-NEXT: v_xor_b32_e32 v14, s6, v14 973; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 974; GFX8-NEXT: v_cndmask_b32_e32 v14, v17, v14, vcc 975; GFX8-NEXT: s_waitcnt vmcnt(0) 976; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16 977; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v15, v16 978; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15 979; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v16 980; GFX8-NEXT: v_xor_b32_e32 v15, s6, v15 981; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 982; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc 983; GFX8-NEXT: s_setpc_b64 s[30:31] 984; 985; GFX9-LABEL: v_ssubsat_v16i32: 986; GFX9: ; %bb.0: 987; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 988; GFX9-NEXT: v_sub_i32 v0, v0, v16 clamp 989; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 990; GFX9-NEXT: v_sub_i32 v1, v1, v17 clamp 991; GFX9-NEXT: v_sub_i32 v2, v2, v18 clamp 992; GFX9-NEXT: v_sub_i32 v3, v3, v19 clamp 993; GFX9-NEXT: v_sub_i32 v4, v4, v20 clamp 994; GFX9-NEXT: v_sub_i32 v5, v5, v21 clamp 995; GFX9-NEXT: v_sub_i32 v6, v6, v22 clamp 996; GFX9-NEXT: v_sub_i32 v7, v7, v23 clamp 997; GFX9-NEXT: v_sub_i32 v8, v8, v24 clamp 998; GFX9-NEXT: v_sub_i32 v9, v9, v25 clamp 999; GFX9-NEXT: v_sub_i32 v10, v10, v26 clamp 1000; GFX9-NEXT: v_sub_i32 v11, v11, v27 clamp 1001; GFX9-NEXT: v_sub_i32 v12, v12, v28 clamp 1002; GFX9-NEXT: v_sub_i32 v13, v13, v29 clamp 1003; GFX9-NEXT: v_sub_i32 v14, v14, v30 clamp 1004; GFX9-NEXT: s_waitcnt vmcnt(0) 1005; GFX9-NEXT: v_sub_i32 v15, v15, v16 clamp 1006; GFX9-NEXT: s_setpc_b64 s[30:31] 1007; 1008; GFX10-LABEL: v_ssubsat_v16i32: 1009; GFX10: ; %bb.0: 1010; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1011; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1012; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 1013; GFX10-NEXT: v_sub_nc_i32 v0, v0, v16 clamp 1014; GFX10-NEXT: v_sub_nc_i32 v1, v1, v17 clamp 1015; GFX10-NEXT: v_sub_nc_i32 v2, v2, v18 clamp 1016; GFX10-NEXT: v_sub_nc_i32 v3, v3, v19 clamp 1017; GFX10-NEXT: v_sub_nc_i32 v4, v4, v20 clamp 1018; GFX10-NEXT: v_sub_nc_i32 v5, v5, v21 clamp 1019; GFX10-NEXT: v_sub_nc_i32 v6, v6, v22 clamp 1020; GFX10-NEXT: v_sub_nc_i32 v7, v7, v23 clamp 1021; GFX10-NEXT: v_sub_nc_i32 v8, v8, v24 clamp 1022; GFX10-NEXT: v_sub_nc_i32 v9, v9, v25 clamp 1023; GFX10-NEXT: v_sub_nc_i32 v10, v10, v26 clamp 1024; GFX10-NEXT: v_sub_nc_i32 v11, v11, v27 clamp 1025; GFX10-NEXT: v_sub_nc_i32 v12, v12, v28 clamp 1026; GFX10-NEXT: v_sub_nc_i32 v13, v13, v29 clamp 1027; GFX10-NEXT: v_sub_nc_i32 v14, v14, v30 clamp 1028; GFX10-NEXT: s_waitcnt vmcnt(0) 1029; GFX10-NEXT: v_sub_nc_i32 v15, v15, v31 clamp 1030; GFX10-NEXT: s_setpc_b64 s[30:31] 1031 %result = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %lhs, <16 x i32> %rhs) 1032 ret <16 x i32> %result 1033} 1034 1035 1036define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) { 1037; GFX6-LABEL: v_ssubsat_i64: 1038; GFX6: ; %bb.0: 1039; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1040; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 1041; GFX6-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc 1042; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 1043; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3] 1044; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v5 1045; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc 1046; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1047; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1048; GFX6-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 1049; GFX6-NEXT: s_setpc_b64 s[30:31] 1050; 1051; GFX8-LABEL: v_ssubsat_i64: 1052; GFX8: ; %bb.0: 1053; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1054; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v0, v2 1055; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc 1056; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 1057; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3] 1058; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v5 1059; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 1060; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1061; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1062; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 1063; GFX8-NEXT: s_setpc_b64 s[30:31] 1064; 1065; GFX9-LABEL: v_ssubsat_i64: 1066; GFX9: ; %bb.0: 1067; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1068; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v2 1069; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v3, vcc 1070; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 1071; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3] 1072; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v5 1073; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc 1074; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1075; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1076; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 1077; GFX9-NEXT: s_setpc_b64 s[30:31] 1078; 1079; GFX10-LABEL: v_ssubsat_i64: 1080; GFX10: ; %bb.0: 1081; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1082; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1083; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2 1084; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo 1085; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[2:3] 1086; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5 1087; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1] 1088; GFX10-NEXT: v_xor_b32_e32 v1, 0x80000000, v6 1089; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo 1090; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc_lo 1091; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo 1092; GFX10-NEXT: s_setpc_b64 s[30:31] 1093 %result = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs) 1094 ret i64 %result 1095} 1096 1097declare i8 @llvm.ssub.sat.i8(i8, i8) #0 1098declare i16 @llvm.ssub.sat.i16(i16, i16) #0 1099declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) #0 1100declare <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16>, <3 x i16>) #0 1101declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) #0 1102declare i32 @llvm.ssub.sat.i32(i32, i32) #0 1103declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) #0 1104declare <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32>, <3 x i32>) #0 1105declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) #0 1106declare <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>) #0 1107declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) #0 1108declare i64 @llvm.ssub.sat.i64(i64, i64) #0 1109 1110attributes #0 = { nounwind readnone speculatable willreturn } 1111