1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck --check-prefix=GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s 4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s 5; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX10PLUS,GFX10 %s 6; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX10PLUS,GFX11 %s 7 8define i8 @v_ssubsat_i8(i8 %lhs, i8 %rhs) { 9; GFX6-LABEL: v_ssubsat_i8: 10; GFX6: ; %bb.0: 11; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 8 13; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 14; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 15; GFX6-NEXT: v_min_i32_e32 v0, 0x7f, v0 16; GFX6-NEXT: v_max_i32_e32 v0, 0xffffff80, v0 17; GFX6-NEXT: s_setpc_b64 s[30:31] 18; 19; GFX8-LABEL: v_ssubsat_i8: 20; GFX8: ; %bb.0: 21; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX8-NEXT: v_sub_u16_sdwa v0, sext(v0), sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 23; GFX8-NEXT: v_min_i16_e32 v0, 0x7f, v0 24; GFX8-NEXT: v_max_i16_e32 v0, 0xff80, v0 25; GFX8-NEXT: s_setpc_b64 s[30:31] 26; 27; GFX9-LABEL: v_ssubsat_i8: 28; GFX9: ; %bb.0: 29; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 30; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 31; GFX9-NEXT: v_lshlrev_b16_e32 v0, 8, v0 32; GFX9-NEXT: v_sub_i16 v0, v0, v1 clamp 33; GFX9-NEXT: v_ashrrev_i16_e32 v0, 8, v0 34; GFX9-NEXT: s_setpc_b64 s[30:31] 35; 36; GFX10PLUS-LABEL: v_ssubsat_i8: 37; GFX10PLUS: ; %bb.0: 38; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 40; GFX10PLUS-NEXT: v_lshlrev_b16 v1, 8, v1 41; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 8, v0 42; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp 43; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 8, v0 44; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 45 %result = call i8 @llvm.ssub.sat.i8(i8 %lhs, i8 %rhs) 46 ret i8 %result 47} 48 49define i16 @v_ssubsat_i16(i16 %lhs, i16 %rhs) { 50; GFX6-LABEL: v_ssubsat_i16: 51; GFX6: ; %bb.0: 52; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 53; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 54; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 55; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 56; GFX6-NEXT: v_min_i32_e32 v0, 0x7fff, v0 57; GFX6-NEXT: v_max_i32_e32 v0, 0xffff8000, v0 58; GFX6-NEXT: s_setpc_b64 s[30:31] 59; 60; GFX8-LABEL: v_ssubsat_i16: 61; GFX8: ; %bb.0: 62; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 63; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v1 64; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1 65; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0 66; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1 67; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0 68; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 69; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 70; GFX8-NEXT: s_setpc_b64 s[30:31] 71; 72; GFX9-LABEL: v_ssubsat_i16: 73; GFX9: ; %bb.0: 74; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX9-NEXT: v_sub_i16 v0, v0, v1 clamp 76; GFX9-NEXT: s_setpc_b64 s[30:31] 77; 78; GFX10PLUS-LABEL: v_ssubsat_i16: 79; GFX10PLUS: ; %bb.0: 80; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 81; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 82; GFX10PLUS-NEXT: v_sub_nc_i16 v0, v0, v1 clamp 83; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 84 %result = call i16 @llvm.ssub.sat.i16(i16 %lhs, i16 %rhs) 85 ret i16 %result 86} 87 88define i32 @v_ssubsat_i32(i32 %lhs, i32 %rhs) { 89; GFX6-LABEL: v_ssubsat_i32: 90; GFX6: ; %bb.0: 91; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 92; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1 93; GFX6-NEXT: v_sub_i32_e64 v1, s[4:5], v0, v1 94; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0 95; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v1 96; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 97; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 98; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 99; GFX6-NEXT: s_setpc_b64 s[30:31] 100; 101; GFX8-LABEL: v_ssubsat_i32: 102; GFX8: ; %bb.0: 103; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 104; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1 105; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], v0, v1 106; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v1, v0 107; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v1 108; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 109; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 110; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 111; GFX8-NEXT: s_setpc_b64 s[30:31] 112; 113; GFX9-LABEL: v_ssubsat_i32: 114; GFX9: ; %bb.0: 115; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 116; GFX9-NEXT: v_sub_i32 v0, v0, v1 clamp 117; GFX9-NEXT: s_setpc_b64 s[30:31] 118; 119; GFX10PLUS-LABEL: v_ssubsat_i32: 120; GFX10PLUS: ; %bb.0: 121; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 122; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 123; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v1 clamp 124; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 125 %result = call i32 @llvm.ssub.sat.i32(i32 %lhs, i32 %rhs) 126 ret i32 %result 127} 128 129define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) { 130; GFX6-LABEL: v_ssubsat_v2i16: 131; GFX6: ; %bb.0: 132; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 134; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 135; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 136; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 137; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 138; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 139; GFX6-NEXT: v_min_i32_e32 v1, 0x7fff, v1 140; GFX6-NEXT: v_min_i32_e32 v0, 0x7fff, v0 141; GFX6-NEXT: v_max_i32_e32 v1, 0xffff8000, v1 142; GFX6-NEXT: v_max_i32_e32 v0, 0xffff8000, v0 143; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 144; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 145; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 146; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 147; GFX6-NEXT: s_setpc_b64 s[30:31] 148; 149; GFX8-LABEL: v_ssubsat_v2i16: 150; GFX8: ; %bb.0: 151; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 152; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v1 153; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 154; GFX8-NEXT: v_sub_u16_e32 v4, v3, v2 155; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v4, v3 156; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v2 157; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v4 158; GFX8-NEXT: v_xor_b32_e32 v2, 0xffff8000, v2 159; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 160; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 161; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v1 162; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1 163; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v1, v0 164; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v1 165; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0 166; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 167; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 168; GFX8-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 169; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 170; GFX8-NEXT: s_setpc_b64 s[30:31] 171; 172; GFX9-LABEL: v_ssubsat_v2i16: 173; GFX9: ; %bb.0: 174; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 175; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 clamp 176; GFX9-NEXT: s_setpc_b64 s[30:31] 177; 178; GFX10PLUS-LABEL: v_ssubsat_v2i16: 179; GFX10PLUS: ; %bb.0: 180; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 181; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 182; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v1 clamp 183; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 184 %result = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) 185 ret <2 x i16> %result 186} 187 188define <3 x i16> @v_ssubsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) { 189; GFX6-LABEL: v_ssubsat_v3i16: 190; GFX6: ; %bb.0: 191; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 192; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 193; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 194; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16 195; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 196; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16 197; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 198; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 199; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 200; GFX6-NEXT: v_min_i32_e32 v1, 0x7fff, v1 201; GFX6-NEXT: v_min_i32_e32 v0, 0x7fff, v0 202; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 203; GFX6-NEXT: v_max_i32_e32 v1, 0xffff8000, v1 204; GFX6-NEXT: v_max_i32_e32 v0, 0xffff8000, v0 205; GFX6-NEXT: v_min_i32_e32 v2, 0x7fff, v2 206; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 207; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 208; GFX6-NEXT: v_max_i32_e32 v3, 0xffff8000, v2 209; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 210; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3 211; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16 212; GFX6-NEXT: s_setpc_b64 s[30:31] 213; 214; GFX8-LABEL: v_ssubsat_v3i16: 215; GFX8: ; %bb.0: 216; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 217; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 218; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0 219; GFX8-NEXT: v_sub_u16_e32 v6, v5, v4 220; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5 221; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v4 222; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6 223; GFX8-NEXT: v_xor_b32_e32 v4, 0xffff8000, v4 224; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 225; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc 226; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v3 227; GFX8-NEXT: v_sub_u16_e32 v3, v1, v3 228; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1 229; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3 230; GFX8-NEXT: v_xor_b32_e32 v1, 0xffff8000, v1 231; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 232; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 233; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v2 234; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2 235; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0 236; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2 237; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0 238; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 239; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 240; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4 241; GFX8-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 242; GFX8-NEXT: s_setpc_b64 s[30:31] 243; 244; GFX9-LABEL: v_ssubsat_v3i16: 245; GFX9: ; %bb.0: 246; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 247; GFX9-NEXT: v_pk_sub_i16 v1, v1, v3 clamp 248; GFX9-NEXT: v_pk_sub_i16 v0, v0, v2 clamp 249; GFX9-NEXT: s_setpc_b64 s[30:31] 250; 251; GFX10PLUS-LABEL: v_ssubsat_v3i16: 252; GFX10PLUS: ; %bb.0: 253; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 254; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 255; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v2 clamp 256; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v3 clamp 257; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 258 %result = call <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs) 259 ret <3 x i16> %result 260} 261 262define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 263; GFX6-LABEL: v_ssubsat_v4i16: 264; GFX6: ; %bb.0: 265; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 266; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16 267; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 268; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16 269; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 270; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 271; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 272; GFX6-NEXT: v_min_i32_e32 v1, 0x7fff, v1 273; GFX6-NEXT: v_min_i32_e32 v0, 0x7fff, v0 274; GFX6-NEXT: v_max_i32_e32 v1, 0xffff8000, v1 275; GFX6-NEXT: v_max_i32_e32 v0, 0xffff8000, v0 276; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16 277; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 278; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16 279; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 280; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 281; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 282; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 283; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v3, v7 284; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 285; GFX6-NEXT: v_min_i32_e32 v1, 0x7fff, v1 286; GFX6-NEXT: v_min_i32_e32 v2, 0x7fff, v2 287; GFX6-NEXT: v_max_i32_e32 v1, 0xffff8000, v1 288; GFX6-NEXT: v_max_i32_e32 v2, 0xffff8000, v2 289; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 290; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 291; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 292; GFX6-NEXT: s_setpc_b64 s[30:31] 293; 294; GFX8-LABEL: v_ssubsat_v4i16: 295; GFX8: ; %bb.0: 296; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 297; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 298; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v0 299; GFX8-NEXT: v_sub_u16_e32 v6, v5, v4 300; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v6, v5 301; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v4 302; GFX8-NEXT: v_ashrrev_i16_e32 v4, 15, v6 303; GFX8-NEXT: v_xor_b32_e32 v4, 0xffff8000, v4 304; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 305; GFX8-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc 306; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v2 307; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2 308; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v2, v0 309; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v2 310; GFX8-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0 311; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 312; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4 313; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 314; GFX8-NEXT: v_or_b32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 315; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v3 316; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 317; GFX8-NEXT: v_sub_u16_e32 v5, v4, v2 318; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, v5, v4 319; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], 0, v2 320; GFX8-NEXT: v_ashrrev_i16_e32 v2, 15, v5 321; GFX8-NEXT: v_xor_b32_e32 v2, 0xffff8000, v2 322; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 323; GFX8-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc 324; GFX8-NEXT: v_cmp_lt_i16_e32 vcc, 0, v3 325; GFX8-NEXT: v_sub_u16_e32 v3, v1, v3 326; GFX8-NEXT: v_cmp_lt_i16_e64 s[4:5], v3, v1 327; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v3 328; GFX8-NEXT: v_xor_b32_e32 v1, 0xffff8000, v1 329; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 330; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 331; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 332; GFX8-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 333; GFX8-NEXT: s_setpc_b64 s[30:31] 334; 335; GFX9-LABEL: v_ssubsat_v4i16: 336; GFX9: ; %bb.0: 337; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 338; GFX9-NEXT: v_pk_sub_i16 v0, v0, v2 clamp 339; GFX9-NEXT: v_pk_sub_i16 v1, v1, v3 clamp 340; GFX9-NEXT: s_setpc_b64 s[30:31] 341; 342; GFX10PLUS-LABEL: v_ssubsat_v4i16: 343; GFX10PLUS: ; %bb.0: 344; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 345; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 346; GFX10PLUS-NEXT: v_pk_sub_i16 v0, v0, v2 clamp 347; GFX10PLUS-NEXT: v_pk_sub_i16 v1, v1, v3 clamp 348; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 349 %result = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 350 %cast = bitcast <4 x i16> %result to <2 x float> 351 ret <2 x float> %cast 352} 353 354define <2 x i32> @v_ssubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 355; GFX6-LABEL: v_ssubsat_v2i32: 356; GFX6: ; %bb.0: 357; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 358; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2 359; GFX6-NEXT: v_sub_i32_e64 v2, s[4:5], v0, v2 360; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0 361; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v2 362; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 363; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 364; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 365; GFX6-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v3 366; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3 367; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1 368; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v2 369; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 370; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 371; GFX6-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 372; GFX6-NEXT: s_setpc_b64 s[30:31] 373; 374; GFX8-LABEL: v_ssubsat_v2i32: 375; GFX8: ; %bb.0: 376; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 377; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2 378; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v0, v2 379; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v0 380; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v2 381; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 382; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 383; GFX8-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 384; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v1, v3 385; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3 386; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v2, v1 387; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v2 388; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 389; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 390; GFX8-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 391; GFX8-NEXT: s_setpc_b64 s[30:31] 392; 393; GFX9-LABEL: v_ssubsat_v2i32: 394; GFX9: ; %bb.0: 395; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 396; GFX9-NEXT: v_sub_i32 v0, v0, v2 clamp 397; GFX9-NEXT: v_sub_i32 v1, v1, v3 clamp 398; GFX9-NEXT: s_setpc_b64 s[30:31] 399; 400; GFX10PLUS-LABEL: v_ssubsat_v2i32: 401; GFX10PLUS: ; %bb.0: 402; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 403; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 404; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v2 clamp 405; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v3 clamp 406; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 407 %result = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 408 ret <2 x i32> %result 409} 410 411define <3 x i32> @v_ssubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) { 412; GFX6-LABEL: v_ssubsat_v3i32: 413; GFX6: ; %bb.0: 414; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 415; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3 416; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v3 417; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0 418; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v3 419; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 420; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 421; GFX6-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 422; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v4 423; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4 424; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v1 425; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v3 426; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 427; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 428; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 429; GFX6-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v5 430; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5 431; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v2 432; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v3 433; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 434; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 435; GFX6-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 436; GFX6-NEXT: s_setpc_b64 s[30:31] 437; 438; GFX8-LABEL: v_ssubsat_v3i32: 439; GFX8: ; %bb.0: 440; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 441; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v3 442; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v0, v3 443; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0 444; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v3 445; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 446; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 447; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v0, vcc 448; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v1, v4 449; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4 450; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v1 451; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v3 452; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 453; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 454; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 455; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v2, v5 456; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5 457; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v2 458; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v3 459; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 460; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 461; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc 462; GFX8-NEXT: s_setpc_b64 s[30:31] 463; 464; GFX9-LABEL: v_ssubsat_v3i32: 465; GFX9: ; %bb.0: 466; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 467; GFX9-NEXT: v_sub_i32 v0, v0, v3 clamp 468; GFX9-NEXT: v_sub_i32 v1, v1, v4 clamp 469; GFX9-NEXT: v_sub_i32 v2, v2, v5 clamp 470; GFX9-NEXT: s_setpc_b64 s[30:31] 471; 472; GFX10PLUS-LABEL: v_ssubsat_v3i32: 473; GFX10PLUS: ; %bb.0: 474; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 475; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 476; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v3 clamp 477; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v4 clamp 478; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v5 clamp 479; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 480 %result = call <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32> %lhs, <3 x i32> %rhs) 481 ret <3 x i32> %result 482} 483 484define <4 x i32> @v_ssubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 485; GFX6-LABEL: v_ssubsat_v4i32: 486; GFX6: ; %bb.0: 487; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 488; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4 489; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v0, v4 490; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v0 491; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v4 492; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 493; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 494; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 495; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v1, v5 496; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5 497; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v1 498; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v4 499; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 500; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 501; GFX6-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc 502; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v2, v6 503; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v6 504; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v2 505; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v4 506; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 507; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 508; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 509; GFX6-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v7 510; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7 511; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3 512; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v4 513; GFX6-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 514; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 515; GFX6-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc 516; GFX6-NEXT: s_setpc_b64 s[30:31] 517; 518; GFX8-LABEL: v_ssubsat_v4i32: 519; GFX8: ; %bb.0: 520; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 521; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v4 522; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v0, v4 523; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v0 524; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v4 525; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 526; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 527; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 528; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v1, v5 529; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5 530; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v1 531; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v4 532; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 533; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 534; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc 535; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v2, v6 536; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v6 537; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v2 538; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v4 539; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 540; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 541; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 542; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v3, v7 543; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v7 544; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v4, v3 545; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v4 546; GFX8-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 547; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 548; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc 549; GFX8-NEXT: s_setpc_b64 s[30:31] 550; 551; GFX9-LABEL: v_ssubsat_v4i32: 552; GFX9: ; %bb.0: 553; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 554; GFX9-NEXT: v_sub_i32 v0, v0, v4 clamp 555; GFX9-NEXT: v_sub_i32 v1, v1, v5 clamp 556; GFX9-NEXT: v_sub_i32 v2, v2, v6 clamp 557; GFX9-NEXT: v_sub_i32 v3, v3, v7 clamp 558; GFX9-NEXT: s_setpc_b64 s[30:31] 559; 560; GFX10PLUS-LABEL: v_ssubsat_v4i32: 561; GFX10PLUS: ; %bb.0: 562; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 563; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 564; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v4 clamp 565; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v5 clamp 566; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v6 clamp 567; GFX10PLUS-NEXT: v_sub_nc_i32 v3, v3, v7 clamp 568; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 569 %result = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 570 ret <4 x i32> %result 571} 572 573define <8 x i32> @v_ssubsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) { 574; GFX6-LABEL: v_ssubsat_v8i32: 575; GFX6: ; %bb.0: 576; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 577; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v8 578; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v0, v8 579; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v0 580; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v8 581; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 582; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 583; GFX6-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc 584; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v1, v9 585; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v9 586; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v1 587; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v8 588; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 589; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 590; GFX6-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc 591; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v2, v10 592; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v10 593; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v2 594; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v8 595; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 596; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 597; GFX6-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc 598; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v3, v11 599; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11 600; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3 601; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v8 602; GFX6-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 603; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 604; GFX6-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc 605; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v4, v12 606; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12 607; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4 608; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v8 609; GFX6-NEXT: v_xor_b32_e32 v4, 0x80000000, v4 610; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 611; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc 612; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v5, v13 613; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13 614; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5 615; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v8 616; GFX6-NEXT: v_xor_b32_e32 v5, 0x80000000, v5 617; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 618; GFX6-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc 619; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v6, v14 620; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14 621; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6 622; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v8 623; GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v6 624; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 625; GFX6-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc 626; GFX6-NEXT: v_sub_i32_e64 v8, s[4:5], v7, v15 627; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15 628; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7 629; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v8 630; GFX6-NEXT: v_xor_b32_e32 v7, 0x80000000, v7 631; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 632; GFX6-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc 633; GFX6-NEXT: s_setpc_b64 s[30:31] 634; 635; GFX8-LABEL: v_ssubsat_v8i32: 636; GFX8: ; %bb.0: 637; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 638; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v8 639; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v0, v8 640; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v0 641; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v8 642; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 643; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 644; GFX8-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc 645; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v1, v9 646; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v9 647; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v1 648; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v8 649; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 650; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 651; GFX8-NEXT: v_cndmask_b32_e32 v1, v8, v1, vcc 652; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v2, v10 653; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v10 654; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v2 655; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v8 656; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 657; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 658; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc 659; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v3, v11 660; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v11 661; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v3 662; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v8 663; GFX8-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 664; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 665; GFX8-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc 666; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v4, v12 667; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v12 668; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v4 669; GFX8-NEXT: v_ashrrev_i32_e32 v4, 31, v8 670; GFX8-NEXT: v_xor_b32_e32 v4, 0x80000000, v4 671; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 672; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc 673; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v5, v13 674; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v13 675; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v5 676; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v8 677; GFX8-NEXT: v_xor_b32_e32 v5, 0x80000000, v5 678; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 679; GFX8-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc 680; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v6, v14 681; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v14 682; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v6 683; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v8 684; GFX8-NEXT: v_xor_b32_e32 v6, 0x80000000, v6 685; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 686; GFX8-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc 687; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v7, v15 688; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v15 689; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v8, v7 690; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v8 691; GFX8-NEXT: v_xor_b32_e32 v7, 0x80000000, v7 692; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 693; GFX8-NEXT: v_cndmask_b32_e32 v7, v8, v7, vcc 694; GFX8-NEXT: s_setpc_b64 s[30:31] 695; 696; GFX9-LABEL: v_ssubsat_v8i32: 697; GFX9: ; %bb.0: 698; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 699; GFX9-NEXT: v_sub_i32 v0, v0, v8 clamp 700; GFX9-NEXT: v_sub_i32 v1, v1, v9 clamp 701; GFX9-NEXT: v_sub_i32 v2, v2, v10 clamp 702; GFX9-NEXT: v_sub_i32 v3, v3, v11 clamp 703; GFX9-NEXT: v_sub_i32 v4, v4, v12 clamp 704; GFX9-NEXT: v_sub_i32 v5, v5, v13 clamp 705; GFX9-NEXT: v_sub_i32 v6, v6, v14 clamp 706; GFX9-NEXT: v_sub_i32 v7, v7, v15 clamp 707; GFX9-NEXT: s_setpc_b64 s[30:31] 708; 709; GFX10PLUS-LABEL: v_ssubsat_v8i32: 710; GFX10PLUS: ; %bb.0: 711; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 712; GFX10PLUS-NEXT: s_waitcnt_vscnt null, 0x0 713; GFX10PLUS-NEXT: v_sub_nc_i32 v0, v0, v8 clamp 714; GFX10PLUS-NEXT: v_sub_nc_i32 v1, v1, v9 clamp 715; GFX10PLUS-NEXT: v_sub_nc_i32 v2, v2, v10 clamp 716; GFX10PLUS-NEXT: v_sub_nc_i32 v3, v3, v11 clamp 717; GFX10PLUS-NEXT: v_sub_nc_i32 v4, v4, v12 clamp 718; GFX10PLUS-NEXT: v_sub_nc_i32 v5, v5, v13 clamp 719; GFX10PLUS-NEXT: v_sub_nc_i32 v6, v6, v14 clamp 720; GFX10PLUS-NEXT: v_sub_nc_i32 v7, v7, v15 clamp 721; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 722 %result = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %lhs, <8 x i32> %rhs) 723 ret <8 x i32> %result 724} 725 726define <16 x i32> @v_ssubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) { 727; GFX6-LABEL: v_ssubsat_v16i32: 728; GFX6: ; %bb.0: 729; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 730; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16 731; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v0, v16 732; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v0 733; GFX6-NEXT: v_ashrrev_i32_e32 v0, 31, v16 734; GFX6-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 735; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 736; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc 737; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v1, v17 738; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v17 739; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v1 740; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v16 741; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 742; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 743; GFX6-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc 744; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v2, v18 745; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v18 746; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v2 747; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v16 748; GFX6-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 749; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 750; GFX6-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc 751; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v3, v19 752; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19 753; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3 754; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v16 755; GFX6-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 756; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 757; GFX6-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc 758; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v4, v20 759; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20 760; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v4 761; GFX6-NEXT: v_ashrrev_i32_e32 v4, 31, v16 762; GFX6-NEXT: v_xor_b32_e32 v4, 0x80000000, v4 763; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 764; GFX6-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc 765; GFX6-NEXT: buffer_load_dword v16, off, s[0:3], s32 766; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v5, v21 767; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21 768; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v5 769; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v17 770; GFX6-NEXT: v_xor_b32_e32 v5, 0x80000000, v5 771; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 772; GFX6-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc 773; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v6, v22 774; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22 775; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v6 776; GFX6-NEXT: v_ashrrev_i32_e32 v6, 31, v17 777; GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v6 778; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 779; GFX6-NEXT: v_cndmask_b32_e32 v6, v17, v6, vcc 780; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v7, v23 781; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23 782; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v7 783; GFX6-NEXT: v_ashrrev_i32_e32 v7, 31, v17 784; GFX6-NEXT: v_xor_b32_e32 v7, 0x80000000, v7 785; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 786; GFX6-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc 787; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v8, v24 788; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24 789; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v8 790; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v17 791; GFX6-NEXT: v_xor_b32_e32 v8, 0x80000000, v8 792; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 793; GFX6-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc 794; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v9, v25 795; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25 796; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v9 797; GFX6-NEXT: v_ashrrev_i32_e32 v9, 31, v17 798; GFX6-NEXT: v_xor_b32_e32 v9, 0x80000000, v9 799; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 800; GFX6-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc 801; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v10, v26 802; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26 803; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v10 804; GFX6-NEXT: v_ashrrev_i32_e32 v10, 31, v17 805; GFX6-NEXT: v_xor_b32_e32 v10, 0x80000000, v10 806; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 807; GFX6-NEXT: v_cndmask_b32_e32 v10, v17, v10, vcc 808; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v11, v27 809; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27 810; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v11 811; GFX6-NEXT: v_ashrrev_i32_e32 v11, 31, v17 812; GFX6-NEXT: v_xor_b32_e32 v11, 0x80000000, v11 813; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 814; GFX6-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc 815; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v12, v28 816; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28 817; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v12 818; GFX6-NEXT: v_ashrrev_i32_e32 v12, 31, v17 819; GFX6-NEXT: v_xor_b32_e32 v12, 0x80000000, v12 820; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 821; GFX6-NEXT: v_cndmask_b32_e32 v12, v17, v12, vcc 822; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v13, v29 823; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29 824; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v13 825; GFX6-NEXT: v_ashrrev_i32_e32 v13, 31, v17 826; GFX6-NEXT: v_xor_b32_e32 v13, 0x80000000, v13 827; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 828; GFX6-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc 829; GFX6-NEXT: v_sub_i32_e64 v17, s[4:5], v14, v30 830; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30 831; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v14 832; GFX6-NEXT: v_ashrrev_i32_e32 v14, 31, v17 833; GFX6-NEXT: v_xor_b32_e32 v14, 0x80000000, v14 834; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 835; GFX6-NEXT: v_cndmask_b32_e32 v14, v17, v14, vcc 836; GFX6-NEXT: s_waitcnt vmcnt(0) 837; GFX6-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16 838; GFX6-NEXT: v_sub_i32_e64 v16, s[4:5], v15, v16 839; GFX6-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15 840; GFX6-NEXT: v_ashrrev_i32_e32 v15, 31, v16 841; GFX6-NEXT: v_xor_b32_e32 v15, 0x80000000, v15 842; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 843; GFX6-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc 844; GFX6-NEXT: s_setpc_b64 s[30:31] 845; 846; GFX8-LABEL: v_ssubsat_v16i32: 847; GFX8: ; %bb.0: 848; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 849; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16 850; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v0, v16 851; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v0 852; GFX8-NEXT: v_ashrrev_i32_e32 v0, 31, v16 853; GFX8-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 854; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 855; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc 856; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v1, v17 857; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v17 858; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v1 859; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v16 860; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 861; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 862; GFX8-NEXT: v_cndmask_b32_e32 v1, v16, v1, vcc 863; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v2, v18 864; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v18 865; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v2 866; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v16 867; GFX8-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 868; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 869; GFX8-NEXT: v_cndmask_b32_e32 v2, v16, v2, vcc 870; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v3, v19 871; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v19 872; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v3 873; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v16 874; GFX8-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 875; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 876; GFX8-NEXT: v_cndmask_b32_e32 v3, v16, v3, vcc 877; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v4, v20 878; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v20 879; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v4 880; GFX8-NEXT: v_ashrrev_i32_e32 v4, 31, v16 881; GFX8-NEXT: v_xor_b32_e32 v4, 0x80000000, v4 882; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 883; GFX8-NEXT: v_cndmask_b32_e32 v4, v16, v4, vcc 884; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32 885; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v5, v21 886; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v21 887; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v5 888; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v17 889; GFX8-NEXT: v_xor_b32_e32 v5, 0x80000000, v5 890; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 891; GFX8-NEXT: v_cndmask_b32_e32 v5, v17, v5, vcc 892; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v6, v22 893; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v22 894; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v6 895; GFX8-NEXT: v_ashrrev_i32_e32 v6, 31, v17 896; GFX8-NEXT: v_xor_b32_e32 v6, 0x80000000, v6 897; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 898; GFX8-NEXT: v_cndmask_b32_e32 v6, v17, v6, vcc 899; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v7, v23 900; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v23 901; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v7 902; GFX8-NEXT: v_ashrrev_i32_e32 v7, 31, v17 903; GFX8-NEXT: v_xor_b32_e32 v7, 0x80000000, v7 904; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 905; GFX8-NEXT: v_cndmask_b32_e32 v7, v17, v7, vcc 906; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v8, v24 907; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v24 908; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v8 909; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v17 910; GFX8-NEXT: v_xor_b32_e32 v8, 0x80000000, v8 911; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 912; GFX8-NEXT: v_cndmask_b32_e32 v8, v17, v8, vcc 913; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v9, v25 914; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v25 915; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v9 916; GFX8-NEXT: v_ashrrev_i32_e32 v9, 31, v17 917; GFX8-NEXT: v_xor_b32_e32 v9, 0x80000000, v9 918; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 919; GFX8-NEXT: v_cndmask_b32_e32 v9, v17, v9, vcc 920; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v10, v26 921; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v26 922; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v10 923; GFX8-NEXT: v_ashrrev_i32_e32 v10, 31, v17 924; GFX8-NEXT: v_xor_b32_e32 v10, 0x80000000, v10 925; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 926; GFX8-NEXT: v_cndmask_b32_e32 v10, v17, v10, vcc 927; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v11, v27 928; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v27 929; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v11 930; GFX8-NEXT: v_ashrrev_i32_e32 v11, 31, v17 931; GFX8-NEXT: v_xor_b32_e32 v11, 0x80000000, v11 932; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 933; GFX8-NEXT: v_cndmask_b32_e32 v11, v17, v11, vcc 934; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v12, v28 935; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v28 936; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v12 937; GFX8-NEXT: v_ashrrev_i32_e32 v12, 31, v17 938; GFX8-NEXT: v_xor_b32_e32 v12, 0x80000000, v12 939; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 940; GFX8-NEXT: v_cndmask_b32_e32 v12, v17, v12, vcc 941; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v13, v29 942; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v29 943; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v13 944; GFX8-NEXT: v_ashrrev_i32_e32 v13, 31, v17 945; GFX8-NEXT: v_xor_b32_e32 v13, 0x80000000, v13 946; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 947; GFX8-NEXT: v_cndmask_b32_e32 v13, v17, v13, vcc 948; GFX8-NEXT: v_sub_u32_e64 v17, s[4:5], v14, v30 949; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v30 950; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v17, v14 951; GFX8-NEXT: v_ashrrev_i32_e32 v14, 31, v17 952; GFX8-NEXT: v_xor_b32_e32 v14, 0x80000000, v14 953; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 954; GFX8-NEXT: v_cndmask_b32_e32 v14, v17, v14, vcc 955; GFX8-NEXT: s_waitcnt vmcnt(0) 956; GFX8-NEXT: v_cmp_lt_i32_e32 vcc, 0, v16 957; GFX8-NEXT: v_sub_u32_e64 v16, s[4:5], v15, v16 958; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v16, v15 959; GFX8-NEXT: v_ashrrev_i32_e32 v15, 31, v16 960; GFX8-NEXT: v_xor_b32_e32 v15, 0x80000000, v15 961; GFX8-NEXT: s_xor_b64 vcc, vcc, s[4:5] 962; GFX8-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc 963; GFX8-NEXT: s_setpc_b64 s[30:31] 964; 965; GFX9-LABEL: v_ssubsat_v16i32: 966; GFX9: ; %bb.0: 967; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 968; GFX9-NEXT: v_sub_i32 v0, v0, v16 clamp 969; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 970; GFX9-NEXT: v_sub_i32 v1, v1, v17 clamp 971; GFX9-NEXT: v_sub_i32 v2, v2, v18 clamp 972; GFX9-NEXT: v_sub_i32 v3, v3, v19 clamp 973; GFX9-NEXT: v_sub_i32 v4, v4, v20 clamp 974; GFX9-NEXT: v_sub_i32 v5, v5, v21 clamp 975; GFX9-NEXT: v_sub_i32 v6, v6, v22 clamp 976; GFX9-NEXT: v_sub_i32 v7, v7, v23 clamp 977; GFX9-NEXT: v_sub_i32 v8, v8, v24 clamp 978; GFX9-NEXT: v_sub_i32 v9, v9, v25 clamp 979; GFX9-NEXT: v_sub_i32 v10, v10, v26 clamp 980; GFX9-NEXT: v_sub_i32 v11, v11, v27 clamp 981; GFX9-NEXT: v_sub_i32 v12, v12, v28 clamp 982; GFX9-NEXT: v_sub_i32 v13, v13, v29 clamp 983; GFX9-NEXT: v_sub_i32 v14, v14, v30 clamp 984; GFX9-NEXT: s_waitcnt vmcnt(0) 985; GFX9-NEXT: v_sub_i32 v15, v15, v16 clamp 986; GFX9-NEXT: s_setpc_b64 s[30:31] 987; 988; GFX10-LABEL: v_ssubsat_v16i32: 989; GFX10: ; %bb.0: 990; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 991; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 992; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 993; GFX10-NEXT: v_sub_nc_i32 v0, v0, v16 clamp 994; GFX10-NEXT: v_sub_nc_i32 v1, v1, v17 clamp 995; GFX10-NEXT: v_sub_nc_i32 v2, v2, v18 clamp 996; GFX10-NEXT: v_sub_nc_i32 v3, v3, v19 clamp 997; GFX10-NEXT: v_sub_nc_i32 v4, v4, v20 clamp 998; GFX10-NEXT: v_sub_nc_i32 v5, v5, v21 clamp 999; GFX10-NEXT: v_sub_nc_i32 v6, v6, v22 clamp 1000; GFX10-NEXT: v_sub_nc_i32 v7, v7, v23 clamp 1001; GFX10-NEXT: v_sub_nc_i32 v8, v8, v24 clamp 1002; GFX10-NEXT: v_sub_nc_i32 v9, v9, v25 clamp 1003; GFX10-NEXT: v_sub_nc_i32 v10, v10, v26 clamp 1004; GFX10-NEXT: v_sub_nc_i32 v11, v11, v27 clamp 1005; GFX10-NEXT: v_sub_nc_i32 v12, v12, v28 clamp 1006; GFX10-NEXT: v_sub_nc_i32 v13, v13, v29 clamp 1007; GFX10-NEXT: v_sub_nc_i32 v14, v14, v30 clamp 1008; GFX10-NEXT: s_waitcnt vmcnt(0) 1009; GFX10-NEXT: v_sub_nc_i32 v15, v15, v31 clamp 1010; GFX10-NEXT: s_setpc_b64 s[30:31] 1011; 1012; GFX11-LABEL: v_ssubsat_v16i32: 1013; GFX11: ; %bb.0: 1014; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1015; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1016; GFX11-NEXT: scratch_load_b32 v31, off, s32 1017; GFX11-NEXT: v_sub_nc_i32 v0, v0, v16 clamp 1018; GFX11-NEXT: v_sub_nc_i32 v1, v1, v17 clamp 1019; GFX11-NEXT: v_sub_nc_i32 v2, v2, v18 clamp 1020; GFX11-NEXT: v_sub_nc_i32 v3, v3, v19 clamp 1021; GFX11-NEXT: v_sub_nc_i32 v4, v4, v20 clamp 1022; GFX11-NEXT: v_sub_nc_i32 v5, v5, v21 clamp 1023; GFX11-NEXT: v_sub_nc_i32 v6, v6, v22 clamp 1024; GFX11-NEXT: v_sub_nc_i32 v7, v7, v23 clamp 1025; GFX11-NEXT: v_sub_nc_i32 v8, v8, v24 clamp 1026; GFX11-NEXT: v_sub_nc_i32 v9, v9, v25 clamp 1027; GFX11-NEXT: v_sub_nc_i32 v10, v10, v26 clamp 1028; GFX11-NEXT: v_sub_nc_i32 v11, v11, v27 clamp 1029; GFX11-NEXT: v_sub_nc_i32 v12, v12, v28 clamp 1030; GFX11-NEXT: v_sub_nc_i32 v13, v13, v29 clamp 1031; GFX11-NEXT: v_sub_nc_i32 v14, v14, v30 clamp 1032; GFX11-NEXT: s_waitcnt vmcnt(0) 1033; GFX11-NEXT: v_sub_nc_i32 v15, v15, v31 clamp 1034; GFX11-NEXT: s_setpc_b64 s[30:31] 1035 %result = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %lhs, <16 x i32> %rhs) 1036 ret <16 x i32> %result 1037} 1038 1039 1040define i64 @v_ssubsat_i64(i64 %lhs, i64 %rhs) { 1041; GFX6-LABEL: v_ssubsat_i64: 1042; GFX6: ; %bb.0: 1043; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1044; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v0, v2 1045; GFX6-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc 1046; GFX6-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 1047; GFX6-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3] 1048; GFX6-NEXT: v_ashrrev_i32_e32 v1, 31, v5 1049; GFX6-NEXT: s_xor_b64 vcc, s[4:5], vcc 1050; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1051; GFX6-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1052; GFX6-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 1053; GFX6-NEXT: s_setpc_b64 s[30:31] 1054; 1055; GFX8-LABEL: v_ssubsat_i64: 1056; GFX8: ; %bb.0: 1057; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1058; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v0, v2 1059; GFX8-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc 1060; GFX8-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 1061; GFX8-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3] 1062; GFX8-NEXT: v_ashrrev_i32_e32 v1, 31, v5 1063; GFX8-NEXT: s_xor_b64 vcc, s[4:5], vcc 1064; GFX8-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1065; GFX8-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1066; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 1067; GFX8-NEXT: s_setpc_b64 s[30:31] 1068; 1069; GFX9-LABEL: v_ssubsat_i64: 1070; GFX9: ; %bb.0: 1071; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1072; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v2 1073; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v3, vcc 1074; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, v[4:5], v[0:1] 1075; GFX9-NEXT: v_cmp_lt_i64_e64 s[4:5], 0, v[2:3] 1076; GFX9-NEXT: v_ashrrev_i32_e32 v1, 31, v5 1077; GFX9-NEXT: s_xor_b64 vcc, s[4:5], vcc 1078; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v1, vcc 1079; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1080; GFX9-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc 1081; GFX9-NEXT: s_setpc_b64 s[30:31] 1082; 1083; GFX10-LABEL: v_ssubsat_i64: 1084; GFX10: ; %bb.0: 1085; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1086; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1087; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2 1088; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo 1089; GFX10-NEXT: v_cmp_lt_i64_e64 s4, 0, v[2:3] 1090; GFX10-NEXT: v_ashrrev_i32_e32 v6, 31, v5 1091; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1] 1092; GFX10-NEXT: v_xor_b32_e32 v1, 0x80000000, v6 1093; GFX10-NEXT: s_xor_b32 vcc_lo, s4, vcc_lo 1094; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc_lo 1095; GFX10-NEXT: v_cndmask_b32_e32 v1, v5, v1, vcc_lo 1096; GFX10-NEXT: s_setpc_b64 s[30:31] 1097; 1098; GFX11-LABEL: v_ssubsat_i64: 1099; GFX11: ; %bb.0: 1100; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1101; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 1102; GFX11-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2 1103; GFX11-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo 1104; GFX11-NEXT: v_cmp_lt_i64_e64 s0, 0, v[2:3] 1105; GFX11-NEXT: v_ashrrev_i32_e32 v6, 31, v5 1106; GFX11-NEXT: v_cmp_lt_i64_e32 vcc_lo, v[4:5], v[0:1] 1107; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v6 1108; GFX11-NEXT: s_xor_b32 vcc_lo, s0, vcc_lo 1109; GFX11-NEXT: v_dual_cndmask_b32 v0, v4, v6 :: v_dual_cndmask_b32 v1, v5, v1 1110; GFX11-NEXT: s_setpc_b64 s[30:31] 1111 %result = call i64 @llvm.ssub.sat.i64(i64 %lhs, i64 %rhs) 1112 ret i64 %result 1113} 1114 1115declare i8 @llvm.ssub.sat.i8(i8, i8) #0 1116declare i16 @llvm.ssub.sat.i16(i16, i16) #0 1117declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>) #0 1118declare <3 x i16> @llvm.ssub.sat.v3i16(<3 x i16>, <3 x i16>) #0 1119declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>) #0 1120declare i32 @llvm.ssub.sat.i32(i32, i32) #0 1121declare <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32>, <2 x i32>) #0 1122declare <3 x i32> @llvm.ssub.sat.v3i32(<3 x i32>, <3 x i32>) #0 1123declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>) #0 1124declare <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32>, <8 x i32>) #0 1125declare <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32>, <16 x i32>) #0 1126declare i64 @llvm.ssub.sat.i64(i64, i64) #0 1127 1128attributes #0 = { nounwind readnone speculatable willreturn } 1129