1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck --check-prefix=GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s 4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s 5; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s 6 7define i8 @v_usubsat_i8(i8 %lhs, i8 %rhs) { 8; GFX6-LABEL: v_usubsat_i8: 9; GFX6: ; %bb.0: 10; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX6-NEXT: s_movk_i32 s4, 0xff 12; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 13; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 14; GFX6-NEXT: v_max_u32_e32 v0, v0, v1 15; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 16; GFX6-NEXT: s_setpc_b64 s[30:31] 17; 18; GFX8-LABEL: v_usubsat_i8: 19; GFX8: ; %bb.0: 20; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 22; GFX8-NEXT: s_setpc_b64 s[30:31] 23; 24; GFX9-LABEL: v_usubsat_i8: 25; GFX9: ; %bb.0: 26; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX9-NEXT: v_sub_u16_sdwa v0, v0, v1 clamp dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 28; GFX9-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX10-LABEL: v_usubsat_i8: 31; GFX10: ; %bb.0: 32; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 34; GFX10-NEXT: s_movk_i32 s4, 0xff 35; GFX10-NEXT: v_and_b32_e32 v1, s4, v1 36; GFX10-NEXT: v_and_b32_e32 v0, s4, v0 37; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1 clamp 38; GFX10-NEXT: s_setpc_b64 s[30:31] 39 %result = call i8 @llvm.usub.sat.i8(i8 %lhs, i8 %rhs) 40 ret i8 %result 41} 42 43define i16 @v_usubsat_i16(i16 %lhs, i16 %rhs) { 44; GFX6-LABEL: v_usubsat_i16: 45; GFX6: ; %bb.0: 46; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 47; GFX6-NEXT: s_mov_b32 s4, 0xffff 48; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 49; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 50; GFX6-NEXT: v_max_u32_e32 v0, v0, v1 51; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 52; GFX6-NEXT: s_setpc_b64 s[30:31] 53; 54; GFX8-LABEL: v_usubsat_i16: 55; GFX8: ; %bb.0: 56; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 57; GFX8-NEXT: v_sub_u16_e64 v0, v0, v1 clamp 58; GFX8-NEXT: s_setpc_b64 s[30:31] 59; 60; GFX9-LABEL: v_usubsat_i16: 61; GFX9: ; %bb.0: 62; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 63; GFX9-NEXT: v_sub_u16_e64 v0, v0, v1 clamp 64; GFX9-NEXT: s_setpc_b64 s[30:31] 65; 66; GFX10-LABEL: v_usubsat_i16: 67; GFX10: ; %bb.0: 68; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 69; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 70; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1 clamp 71; GFX10-NEXT: s_setpc_b64 s[30:31] 72 %result = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) 73 ret i16 %result 74} 75 76define i16 @usubsat_as_bithack_i16(i16 %x) { 77; GFX6-LABEL: usubsat_as_bithack_i16: 78; GFX6: ; %bb.0: 79; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX6-NEXT: v_bfe_i32 v1, v0, 0, 16 81; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v1 82; GFX6-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0 83; GFX6-NEXT: v_and_b32_e32 v0, v1, v0 84; GFX6-NEXT: s_setpc_b64 s[30:31] 85; 86; GFX8-LABEL: usubsat_as_bithack_i16: 87; GFX8: ; %bb.0: 88; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; GFX8-NEXT: s_movk_i32 s4, 0x8000 90; GFX8-NEXT: v_sub_u16_e64 v0, v0, s4 clamp 91; GFX8-NEXT: s_setpc_b64 s[30:31] 92; 93; GFX9-LABEL: usubsat_as_bithack_i16: 94; GFX9: ; %bb.0: 95; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 96; GFX9-NEXT: s_movk_i32 s4, 0x8000 97; GFX9-NEXT: v_sub_u16_e64 v0, v0, s4 clamp 98; GFX9-NEXT: s_setpc_b64 s[30:31] 99; 100; GFX10-LABEL: usubsat_as_bithack_i16: 101; GFX10: ; %bb.0: 102; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 104; GFX10-NEXT: v_sub_nc_u16 v0, v0, 0x8000 clamp 105; GFX10-NEXT: s_setpc_b64 s[30:31] 106 %signsplat = ashr i16 %x, 15 107 %flipsign = xor i16 %x, 32768 108 %result = and i16 %signsplat, %flipsign 109 ret i16 %result 110} 111 112define i16 @usubsat_as_bithack2_i16(i16 %x) { 113; GFX6-LABEL: usubsat_as_bithack2_i16: 114; GFX6: ; %bb.0: 115; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 116; GFX6-NEXT: v_bfe_i32 v1, v0, 0, 16 117; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v1 118; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0xffff8000, v0 119; GFX6-NEXT: v_and_b32_e32 v0, v1, v0 120; GFX6-NEXT: s_setpc_b64 s[30:31] 121; 122; GFX8-LABEL: usubsat_as_bithack2_i16: 123; GFX8: ; %bb.0: 124; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GFX8-NEXT: s_movk_i32 s4, 0x8000 126; GFX8-NEXT: v_sub_u16_e64 v0, v0, s4 clamp 127; GFX8-NEXT: s_setpc_b64 s[30:31] 128; 129; GFX9-LABEL: usubsat_as_bithack2_i16: 130; GFX9: ; %bb.0: 131; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 132; GFX9-NEXT: s_movk_i32 s4, 0x8000 133; GFX9-NEXT: v_sub_u16_e64 v0, v0, s4 clamp 134; GFX9-NEXT: s_setpc_b64 s[30:31] 135; 136; GFX10-LABEL: usubsat_as_bithack2_i16: 137; GFX10: ; %bb.0: 138; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 139; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 140; GFX10-NEXT: v_sub_nc_u16 v0, v0, 0x8000 clamp 141; GFX10-NEXT: s_setpc_b64 s[30:31] 142 %signsplat = ashr i16 %x, 15 143 %flipsign = add i16 %x, 32768 144 %result = and i16 %signsplat, %flipsign 145 ret i16 %result 146} 147 148define i16 @usubsat_as_bithack_commute_i16(i16 %x) { 149; GFX6-LABEL: usubsat_as_bithack_commute_i16: 150; GFX6: ; %bb.0: 151; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 152; GFX6-NEXT: v_bfe_i32 v1, v0, 0, 16 153; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v1 154; GFX6-NEXT: v_add_i32_e32 v0, vcc, 0xffff8000, v0 155; GFX6-NEXT: v_and_b32_e32 v0, v0, v1 156; GFX6-NEXT: s_setpc_b64 s[30:31] 157; 158; GFX8-LABEL: usubsat_as_bithack_commute_i16: 159; GFX8: ; %bb.0: 160; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 161; GFX8-NEXT: s_movk_i32 s4, 0x8000 162; GFX8-NEXT: v_sub_u16_e64 v0, v0, s4 clamp 163; GFX8-NEXT: s_setpc_b64 s[30:31] 164; 165; GFX9-LABEL: usubsat_as_bithack_commute_i16: 166; GFX9: ; %bb.0: 167; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 168; GFX9-NEXT: s_movk_i32 s4, 0x8000 169; GFX9-NEXT: v_sub_u16_e64 v0, v0, s4 clamp 170; GFX9-NEXT: s_setpc_b64 s[30:31] 171; 172; GFX10-LABEL: usubsat_as_bithack_commute_i16: 173; GFX10: ; %bb.0: 174; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 175; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 176; GFX10-NEXT: v_sub_nc_u16 v0, v0, 0x8000 clamp 177; GFX10-NEXT: s_setpc_b64 s[30:31] 178 %signsplat = ashr i16 %x, 15 179 %flipsign = add i16 %x, 32768 180 %result = and i16 %flipsign, %signsplat 181 ret i16 %result 182} 183 184define i32 @v_usubsat_i32(i32 %lhs, i32 %rhs) { 185; GFX6-LABEL: v_usubsat_i32: 186; GFX6: ; %bb.0: 187; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 188; GFX6-NEXT: v_max_u32_e32 v0, v0, v1 189; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 190; GFX6-NEXT: s_setpc_b64 s[30:31] 191; 192; GFX8-LABEL: v_usubsat_i32: 193; GFX8: ; %bb.0: 194; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 195; GFX8-NEXT: v_sub_u32_e64 v0, s[4:5], v0, v1 clamp 196; GFX8-NEXT: s_setpc_b64 s[30:31] 197; 198; GFX9-LABEL: v_usubsat_i32: 199; GFX9: ; %bb.0: 200; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201; GFX9-NEXT: v_sub_u32_e64 v0, v0, v1 clamp 202; GFX9-NEXT: s_setpc_b64 s[30:31] 203; 204; GFX10-LABEL: v_usubsat_i32: 205; GFX10: ; %bb.0: 206; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 207; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 208; GFX10-NEXT: v_sub_nc_u32_e64 v0, v0, v1 clamp 209; GFX10-NEXT: s_setpc_b64 s[30:31] 210 %result = call i32 @llvm.usub.sat.i32(i32 %lhs, i32 %rhs) 211 ret i32 %result 212} 213 214define <2 x i16> @v_usubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) { 215; GFX6-LABEL: v_usubsat_v2i16: 216; GFX6: ; %bb.0: 217; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 218; GFX6-NEXT: s_mov_b32 s4, 0xffff 219; GFX6-NEXT: v_and_b32_e32 v4, s4, v3 220; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 221; GFX6-NEXT: v_and_b32_e32 v2, s4, v2 222; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 223; GFX6-NEXT: v_max_u32_e32 v1, v1, v4 224; GFX6-NEXT: v_max_u32_e32 v0, v0, v2 225; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 226; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 227; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 228; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 229; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 230; GFX6-NEXT: s_setpc_b64 s[30:31] 231; 232; GFX8-LABEL: v_usubsat_v2i16: 233; GFX8: ; %bb.0: 234; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 235; GFX8-NEXT: v_sub_u16_sdwa v2, v0, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 236; GFX8-NEXT: v_sub_u16_e64 v0, v0, v1 clamp 237; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 238; GFX8-NEXT: s_setpc_b64 s[30:31] 239; 240; GFX9-LABEL: v_usubsat_v2i16: 241; GFX9: ; %bb.0: 242; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 243; GFX9-NEXT: v_pk_sub_u16 v0, v0, v1 clamp 244; GFX9-NEXT: s_setpc_b64 s[30:31] 245; 246; GFX10-LABEL: v_usubsat_v2i16: 247; GFX10: ; %bb.0: 248; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 249; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 250; GFX10-NEXT: v_pk_sub_u16 v0, v0, v1 clamp 251; GFX10-NEXT: s_setpc_b64 s[30:31] 252 %result = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) 253 ret <2 x i16> %result 254} 255 256define <3 x i16> @v_usubsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) { 257; GFX6-LABEL: v_usubsat_v3i16: 258; GFX6: ; %bb.0: 259; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 260; GFX6-NEXT: s_mov_b32 s4, 0xffff 261; GFX6-NEXT: v_and_b32_e32 v6, s4, v4 262; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 263; GFX6-NEXT: v_and_b32_e32 v3, s4, v3 264; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 265; GFX6-NEXT: v_max_u32_e32 v1, v1, v6 266; GFX6-NEXT: v_max_u32_e32 v0, v0, v3 267; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 268; GFX6-NEXT: v_and_b32_e32 v5, s4, v5 269; GFX6-NEXT: v_and_b32_e32 v2, s4, v2 270; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 271; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 272; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 273; GFX6-NEXT: v_max_u32_e32 v1, v2, v5 274; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v1, v5 275; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16 276; GFX6-NEXT: s_setpc_b64 s[30:31] 277; 278; GFX8-LABEL: v_usubsat_v3i16: 279; GFX8: ; %bb.0: 280; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 281; GFX8-NEXT: v_sub_u16_sdwa v4, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 282; GFX8-NEXT: v_sub_u16_e64 v0, v0, v2 clamp 283; GFX8-NEXT: v_sub_u16_e64 v1, v1, v3 clamp 284; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 285; GFX8-NEXT: s_setpc_b64 s[30:31] 286; 287; GFX9-LABEL: v_usubsat_v3i16: 288; GFX9: ; %bb.0: 289; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 290; GFX9-NEXT: v_pk_sub_u16 v1, v1, v3 clamp 291; GFX9-NEXT: v_pk_sub_u16 v0, v0, v2 clamp 292; GFX9-NEXT: s_setpc_b64 s[30:31] 293; 294; GFX10-LABEL: v_usubsat_v3i16: 295; GFX10: ; %bb.0: 296; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 297; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 298; GFX10-NEXT: v_pk_sub_u16 v0, v0, v2 clamp 299; GFX10-NEXT: v_pk_sub_u16 v1, v1, v3 clamp 300; GFX10-NEXT: s_setpc_b64 s[30:31] 301 %result = call <3 x i16> @llvm.usub.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs) 302 ret <3 x i16> %result 303} 304 305define <2 x float> @v_usubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 306; GFX6-LABEL: v_usubsat_v4i16: 307; GFX6: ; %bb.0: 308; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 309; GFX6-NEXT: s_mov_b32 s4, 0xffff 310; GFX6-NEXT: v_and_b32_e32 v9, s4, v5 311; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 312; GFX6-NEXT: v_and_b32_e32 v4, s4, v4 313; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 314; GFX6-NEXT: v_max_u32_e32 v1, v1, v9 315; GFX6-NEXT: v_max_u32_e32 v0, v0, v4 316; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 317; GFX6-NEXT: v_and_b32_e32 v8, s4, v7 318; GFX6-NEXT: v_and_b32_e32 v3, s4, v3 319; GFX6-NEXT: v_and_b32_e32 v6, s4, v6 320; GFX6-NEXT: v_and_b32_e32 v2, s4, v2 321; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 322; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 323; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 324; GFX6-NEXT: v_max_u32_e32 v1, v2, v6 325; GFX6-NEXT: v_max_u32_e32 v2, v3, v8 326; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v7 327; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v6 328; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 329; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 330; GFX6-NEXT: s_setpc_b64 s[30:31] 331; 332; GFX8-LABEL: v_usubsat_v4i16: 333; GFX8: ; %bb.0: 334; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 335; GFX8-NEXT: v_sub_u16_sdwa v4, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 336; GFX8-NEXT: v_sub_u16_e64 v0, v0, v2 clamp 337; GFX8-NEXT: v_sub_u16_sdwa v2, v1, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 338; GFX8-NEXT: v_sub_u16_e64 v1, v1, v3 clamp 339; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 340; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 341; GFX8-NEXT: s_setpc_b64 s[30:31] 342; 343; GFX9-LABEL: v_usubsat_v4i16: 344; GFX9: ; %bb.0: 345; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 346; GFX9-NEXT: v_pk_sub_u16 v0, v0, v2 clamp 347; GFX9-NEXT: v_pk_sub_u16 v1, v1, v3 clamp 348; GFX9-NEXT: s_setpc_b64 s[30:31] 349; 350; GFX10-LABEL: v_usubsat_v4i16: 351; GFX10: ; %bb.0: 352; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 353; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 354; GFX10-NEXT: v_pk_sub_u16 v0, v0, v2 clamp 355; GFX10-NEXT: v_pk_sub_u16 v1, v1, v3 clamp 356; GFX10-NEXT: s_setpc_b64 s[30:31] 357 %result = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 358 %cast = bitcast <4 x i16> %result to <2 x float> 359 ret <2 x float> %cast 360} 361 362define <2 x i32> @v_usubsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 363; GFX6-LABEL: v_usubsat_v2i32: 364; GFX6: ; %bb.0: 365; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 366; GFX6-NEXT: v_max_u32_e32 v0, v0, v2 367; GFX6-NEXT: v_max_u32_e32 v1, v1, v3 368; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 369; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 370; GFX6-NEXT: s_setpc_b64 s[30:31] 371; 372; GFX8-LABEL: v_usubsat_v2i32: 373; GFX8: ; %bb.0: 374; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 375; GFX8-NEXT: v_sub_u32_e64 v0, s[4:5], v0, v2 clamp 376; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], v1, v3 clamp 377; GFX8-NEXT: s_setpc_b64 s[30:31] 378; 379; GFX9-LABEL: v_usubsat_v2i32: 380; GFX9: ; %bb.0: 381; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 382; GFX9-NEXT: v_sub_u32_e64 v0, v0, v2 clamp 383; GFX9-NEXT: v_sub_u32_e64 v1, v1, v3 clamp 384; GFX9-NEXT: s_setpc_b64 s[30:31] 385; 386; GFX10-LABEL: v_usubsat_v2i32: 387; GFX10: ; %bb.0: 388; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 389; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 390; GFX10-NEXT: v_sub_nc_u32_e64 v0, v0, v2 clamp 391; GFX10-NEXT: v_sub_nc_u32_e64 v1, v1, v3 clamp 392; GFX10-NEXT: s_setpc_b64 s[30:31] 393 %result = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 394 ret <2 x i32> %result 395} 396 397define <3 x i32> @v_usubsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) { 398; GFX6-LABEL: v_usubsat_v3i32: 399; GFX6: ; %bb.0: 400; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 401; GFX6-NEXT: v_max_u32_e32 v0, v0, v3 402; GFX6-NEXT: v_max_u32_e32 v1, v1, v4 403; GFX6-NEXT: v_max_u32_e32 v2, v2, v5 404; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v3 405; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v4 406; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v5 407; GFX6-NEXT: s_setpc_b64 s[30:31] 408; 409; GFX8-LABEL: v_usubsat_v3i32: 410; GFX8: ; %bb.0: 411; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 412; GFX8-NEXT: v_sub_u32_e64 v0, s[4:5], v0, v3 clamp 413; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], v1, v4 clamp 414; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v2, v5 clamp 415; GFX8-NEXT: s_setpc_b64 s[30:31] 416; 417; GFX9-LABEL: v_usubsat_v3i32: 418; GFX9: ; %bb.0: 419; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 420; GFX9-NEXT: v_sub_u32_e64 v0, v0, v3 clamp 421; GFX9-NEXT: v_sub_u32_e64 v1, v1, v4 clamp 422; GFX9-NEXT: v_sub_u32_e64 v2, v2, v5 clamp 423; GFX9-NEXT: s_setpc_b64 s[30:31] 424; 425; GFX10-LABEL: v_usubsat_v3i32: 426; GFX10: ; %bb.0: 427; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 428; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 429; GFX10-NEXT: v_sub_nc_u32_e64 v0, v0, v3 clamp 430; GFX10-NEXT: v_sub_nc_u32_e64 v1, v1, v4 clamp 431; GFX10-NEXT: v_sub_nc_u32_e64 v2, v2, v5 clamp 432; GFX10-NEXT: s_setpc_b64 s[30:31] 433 %result = call <3 x i32> @llvm.usub.sat.v3i32(<3 x i32> %lhs, <3 x i32> %rhs) 434 ret <3 x i32> %result 435} 436 437define <4 x i32> @v_usubsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 438; GFX6-LABEL: v_usubsat_v4i32: 439; GFX6: ; %bb.0: 440; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 441; GFX6-NEXT: v_max_u32_e32 v0, v0, v4 442; GFX6-NEXT: v_max_u32_e32 v1, v1, v5 443; GFX6-NEXT: v_max_u32_e32 v2, v2, v6 444; GFX6-NEXT: v_max_u32_e32 v3, v3, v7 445; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v4 446; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v5 447; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v6 448; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v7 449; GFX6-NEXT: s_setpc_b64 s[30:31] 450; 451; GFX8-LABEL: v_usubsat_v4i32: 452; GFX8: ; %bb.0: 453; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 454; GFX8-NEXT: v_sub_u32_e64 v0, s[4:5], v0, v4 clamp 455; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], v1, v5 clamp 456; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v2, v6 clamp 457; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v3, v7 clamp 458; GFX8-NEXT: s_setpc_b64 s[30:31] 459; 460; GFX9-LABEL: v_usubsat_v4i32: 461; GFX9: ; %bb.0: 462; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 463; GFX9-NEXT: v_sub_u32_e64 v0, v0, v4 clamp 464; GFX9-NEXT: v_sub_u32_e64 v1, v1, v5 clamp 465; GFX9-NEXT: v_sub_u32_e64 v2, v2, v6 clamp 466; GFX9-NEXT: v_sub_u32_e64 v3, v3, v7 clamp 467; GFX9-NEXT: s_setpc_b64 s[30:31] 468; 469; GFX10-LABEL: v_usubsat_v4i32: 470; GFX10: ; %bb.0: 471; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 472; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 473; GFX10-NEXT: v_sub_nc_u32_e64 v0, v0, v4 clamp 474; GFX10-NEXT: v_sub_nc_u32_e64 v1, v1, v5 clamp 475; GFX10-NEXT: v_sub_nc_u32_e64 v2, v2, v6 clamp 476; GFX10-NEXT: v_sub_nc_u32_e64 v3, v3, v7 clamp 477; GFX10-NEXT: s_setpc_b64 s[30:31] 478 %result = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 479 ret <4 x i32> %result 480} 481 482define <8 x i32> @v_usubsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) { 483; GFX6-LABEL: v_usubsat_v8i32: 484; GFX6: ; %bb.0: 485; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 486; GFX6-NEXT: v_max_u32_e32 v0, v0, v8 487; GFX6-NEXT: v_max_u32_e32 v1, v1, v9 488; GFX6-NEXT: v_max_u32_e32 v2, v2, v10 489; GFX6-NEXT: v_max_u32_e32 v3, v3, v11 490; GFX6-NEXT: v_max_u32_e32 v4, v4, v12 491; GFX6-NEXT: v_max_u32_e32 v5, v5, v13 492; GFX6-NEXT: v_max_u32_e32 v6, v6, v14 493; GFX6-NEXT: v_max_u32_e32 v7, v7, v15 494; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v8 495; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v9 496; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v10 497; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v11 498; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v12 499; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v13 500; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v14 501; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v15 502; GFX6-NEXT: s_setpc_b64 s[30:31] 503; 504; GFX8-LABEL: v_usubsat_v8i32: 505; GFX8: ; %bb.0: 506; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 507; GFX8-NEXT: v_sub_u32_e64 v0, s[4:5], v0, v8 clamp 508; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], v1, v9 clamp 509; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v2, v10 clamp 510; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v3, v11 clamp 511; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v4, v12 clamp 512; GFX8-NEXT: v_sub_u32_e64 v5, s[4:5], v5, v13 clamp 513; GFX8-NEXT: v_sub_u32_e64 v6, s[4:5], v6, v14 clamp 514; GFX8-NEXT: v_sub_u32_e64 v7, s[4:5], v7, v15 clamp 515; GFX8-NEXT: s_setpc_b64 s[30:31] 516; 517; GFX9-LABEL: v_usubsat_v8i32: 518; GFX9: ; %bb.0: 519; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 520; GFX9-NEXT: v_sub_u32_e64 v0, v0, v8 clamp 521; GFX9-NEXT: v_sub_u32_e64 v1, v1, v9 clamp 522; GFX9-NEXT: v_sub_u32_e64 v2, v2, v10 clamp 523; GFX9-NEXT: v_sub_u32_e64 v3, v3, v11 clamp 524; GFX9-NEXT: v_sub_u32_e64 v4, v4, v12 clamp 525; GFX9-NEXT: v_sub_u32_e64 v5, v5, v13 clamp 526; GFX9-NEXT: v_sub_u32_e64 v6, v6, v14 clamp 527; GFX9-NEXT: v_sub_u32_e64 v7, v7, v15 clamp 528; GFX9-NEXT: s_setpc_b64 s[30:31] 529; 530; GFX10-LABEL: v_usubsat_v8i32: 531; GFX10: ; %bb.0: 532; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 533; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 534; GFX10-NEXT: v_sub_nc_u32_e64 v0, v0, v8 clamp 535; GFX10-NEXT: v_sub_nc_u32_e64 v1, v1, v9 clamp 536; GFX10-NEXT: v_sub_nc_u32_e64 v2, v2, v10 clamp 537; GFX10-NEXT: v_sub_nc_u32_e64 v3, v3, v11 clamp 538; GFX10-NEXT: v_sub_nc_u32_e64 v4, v4, v12 clamp 539; GFX10-NEXT: v_sub_nc_u32_e64 v5, v5, v13 clamp 540; GFX10-NEXT: v_sub_nc_u32_e64 v6, v6, v14 clamp 541; GFX10-NEXT: v_sub_nc_u32_e64 v7, v7, v15 clamp 542; GFX10-NEXT: s_setpc_b64 s[30:31] 543 %result = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %lhs, <8 x i32> %rhs) 544 ret <8 x i32> %result 545} 546 547define <16 x i32> @v_usubsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) { 548; GFX6-LABEL: v_usubsat_v16i32: 549; GFX6: ; %bb.0: 550; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 551; GFX6-NEXT: v_max_u32_e32 v0, v0, v16 552; GFX6-NEXT: v_sub_i32_e32 v0, vcc, v0, v16 553; GFX6-NEXT: buffer_load_dword v16, off, s[0:3], s32 554; GFX6-NEXT: v_max_u32_e32 v1, v1, v17 555; GFX6-NEXT: v_max_u32_e32 v2, v2, v18 556; GFX6-NEXT: v_max_u32_e32 v3, v3, v19 557; GFX6-NEXT: v_max_u32_e32 v4, v4, v20 558; GFX6-NEXT: v_max_u32_e32 v5, v5, v21 559; GFX6-NEXT: v_max_u32_e32 v6, v6, v22 560; GFX6-NEXT: v_max_u32_e32 v7, v7, v23 561; GFX6-NEXT: v_max_u32_e32 v8, v8, v24 562; GFX6-NEXT: v_max_u32_e32 v9, v9, v25 563; GFX6-NEXT: v_max_u32_e32 v10, v10, v26 564; GFX6-NEXT: v_max_u32_e32 v11, v11, v27 565; GFX6-NEXT: v_max_u32_e32 v12, v12, v28 566; GFX6-NEXT: v_max_u32_e32 v13, v13, v29 567; GFX6-NEXT: v_max_u32_e32 v14, v14, v30 568; GFX6-NEXT: v_sub_i32_e32 v1, vcc, v1, v17 569; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v18 570; GFX6-NEXT: v_sub_i32_e32 v3, vcc, v3, v19 571; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v20 572; GFX6-NEXT: v_sub_i32_e32 v5, vcc, v5, v21 573; GFX6-NEXT: v_sub_i32_e32 v6, vcc, v6, v22 574; GFX6-NEXT: v_sub_i32_e32 v7, vcc, v7, v23 575; GFX6-NEXT: v_sub_i32_e32 v8, vcc, v8, v24 576; GFX6-NEXT: v_sub_i32_e32 v9, vcc, v9, v25 577; GFX6-NEXT: v_sub_i32_e32 v10, vcc, v10, v26 578; GFX6-NEXT: v_sub_i32_e32 v11, vcc, v11, v27 579; GFX6-NEXT: v_sub_i32_e32 v12, vcc, v12, v28 580; GFX6-NEXT: v_sub_i32_e32 v13, vcc, v13, v29 581; GFX6-NEXT: v_sub_i32_e32 v14, vcc, v14, v30 582; GFX6-NEXT: s_waitcnt vmcnt(0) 583; GFX6-NEXT: v_max_u32_e32 v15, v15, v16 584; GFX6-NEXT: v_sub_i32_e32 v15, vcc, v15, v16 585; GFX6-NEXT: s_setpc_b64 s[30:31] 586; 587; GFX8-LABEL: v_usubsat_v16i32: 588; GFX8: ; %bb.0: 589; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 590; GFX8-NEXT: v_sub_u32_e64 v0, s[4:5], v0, v16 clamp 591; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32 592; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], v1, v17 clamp 593; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], v2, v18 clamp 594; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], v3, v19 clamp 595; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], v4, v20 clamp 596; GFX8-NEXT: v_sub_u32_e64 v5, s[4:5], v5, v21 clamp 597; GFX8-NEXT: v_sub_u32_e64 v6, s[4:5], v6, v22 clamp 598; GFX8-NEXT: v_sub_u32_e64 v7, s[4:5], v7, v23 clamp 599; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], v8, v24 clamp 600; GFX8-NEXT: v_sub_u32_e64 v9, s[4:5], v9, v25 clamp 601; GFX8-NEXT: v_sub_u32_e64 v10, s[4:5], v10, v26 clamp 602; GFX8-NEXT: v_sub_u32_e64 v11, s[4:5], v11, v27 clamp 603; GFX8-NEXT: v_sub_u32_e64 v12, s[4:5], v12, v28 clamp 604; GFX8-NEXT: v_sub_u32_e64 v13, s[4:5], v13, v29 clamp 605; GFX8-NEXT: v_sub_u32_e64 v14, s[4:5], v14, v30 clamp 606; GFX8-NEXT: s_waitcnt vmcnt(0) 607; GFX8-NEXT: v_sub_u32_e64 v15, s[4:5], v15, v16 clamp 608; GFX8-NEXT: s_setpc_b64 s[30:31] 609; 610; GFX9-LABEL: v_usubsat_v16i32: 611; GFX9: ; %bb.0: 612; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 613; GFX9-NEXT: v_sub_u32_e64 v0, v0, v16 clamp 614; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 615; GFX9-NEXT: v_sub_u32_e64 v1, v1, v17 clamp 616; GFX9-NEXT: v_sub_u32_e64 v2, v2, v18 clamp 617; GFX9-NEXT: v_sub_u32_e64 v3, v3, v19 clamp 618; GFX9-NEXT: v_sub_u32_e64 v4, v4, v20 clamp 619; GFX9-NEXT: v_sub_u32_e64 v5, v5, v21 clamp 620; GFX9-NEXT: v_sub_u32_e64 v6, v6, v22 clamp 621; GFX9-NEXT: v_sub_u32_e64 v7, v7, v23 clamp 622; GFX9-NEXT: v_sub_u32_e64 v8, v8, v24 clamp 623; GFX9-NEXT: v_sub_u32_e64 v9, v9, v25 clamp 624; GFX9-NEXT: v_sub_u32_e64 v10, v10, v26 clamp 625; GFX9-NEXT: v_sub_u32_e64 v11, v11, v27 clamp 626; GFX9-NEXT: v_sub_u32_e64 v12, v12, v28 clamp 627; GFX9-NEXT: v_sub_u32_e64 v13, v13, v29 clamp 628; GFX9-NEXT: v_sub_u32_e64 v14, v14, v30 clamp 629; GFX9-NEXT: s_waitcnt vmcnt(0) 630; GFX9-NEXT: v_sub_u32_e64 v15, v15, v16 clamp 631; GFX9-NEXT: s_setpc_b64 s[30:31] 632; 633; GFX10-LABEL: v_usubsat_v16i32: 634; GFX10: ; %bb.0: 635; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 636; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 637; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 638; GFX10-NEXT: v_sub_nc_u32_e64 v0, v0, v16 clamp 639; GFX10-NEXT: v_sub_nc_u32_e64 v1, v1, v17 clamp 640; GFX10-NEXT: v_sub_nc_u32_e64 v2, v2, v18 clamp 641; GFX10-NEXT: v_sub_nc_u32_e64 v3, v3, v19 clamp 642; GFX10-NEXT: v_sub_nc_u32_e64 v4, v4, v20 clamp 643; GFX10-NEXT: v_sub_nc_u32_e64 v5, v5, v21 clamp 644; GFX10-NEXT: v_sub_nc_u32_e64 v6, v6, v22 clamp 645; GFX10-NEXT: v_sub_nc_u32_e64 v7, v7, v23 clamp 646; GFX10-NEXT: v_sub_nc_u32_e64 v8, v8, v24 clamp 647; GFX10-NEXT: v_sub_nc_u32_e64 v9, v9, v25 clamp 648; GFX10-NEXT: v_sub_nc_u32_e64 v10, v10, v26 clamp 649; GFX10-NEXT: v_sub_nc_u32_e64 v11, v11, v27 clamp 650; GFX10-NEXT: v_sub_nc_u32_e64 v12, v12, v28 clamp 651; GFX10-NEXT: v_sub_nc_u32_e64 v13, v13, v29 clamp 652; GFX10-NEXT: v_sub_nc_u32_e64 v14, v14, v30 clamp 653; GFX10-NEXT: s_waitcnt vmcnt(0) 654; GFX10-NEXT: v_sub_nc_u32_e64 v15, v15, v31 clamp 655; GFX10-NEXT: s_setpc_b64 s[30:31] 656 %result = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %lhs, <16 x i32> %rhs) 657 ret <16 x i32> %result 658} 659 660 661define i64 @v_usubsat_i64(i64 %lhs, i64 %rhs) { 662; GFX6-LABEL: v_usubsat_i64: 663; GFX6: ; %bb.0: 664; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 665; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v0, v2 666; GFX6-NEXT: v_subb_u32_e32 v3, vcc, v1, v3, vcc 667; GFX6-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] 668; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 669; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc 670; GFX6-NEXT: s_setpc_b64 s[30:31] 671; 672; GFX8-LABEL: v_usubsat_i64: 673; GFX8: ; %bb.0: 674; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 675; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v0, v2 676; GFX8-NEXT: v_subb_u32_e32 v3, vcc, v1, v3, vcc 677; GFX8-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] 678; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 679; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc 680; GFX8-NEXT: s_setpc_b64 s[30:31] 681; 682; GFX9-LABEL: v_usubsat_i64: 683; GFX9: ; %bb.0: 684; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 685; GFX9-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v2 686; GFX9-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v3, vcc 687; GFX9-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1] 688; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc 689; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc 690; GFX9-NEXT: s_setpc_b64 s[30:31] 691; 692; GFX10-LABEL: v_usubsat_i64: 693; GFX10: ; %bb.0: 694; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 695; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 696; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2 697; GFX10-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo 698; GFX10-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1] 699; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, 0, vcc_lo 700; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc_lo 701; GFX10-NEXT: s_setpc_b64 s[30:31] 702 %result = call i64 @llvm.usub.sat.i64(i64 %lhs, i64 %rhs) 703 ret i64 %result 704} 705 706declare i8 @llvm.usub.sat.i8(i8, i8) #0 707declare i16 @llvm.usub.sat.i16(i16, i16) #0 708declare <2 x i16> @llvm.usub.sat.v2i16(<2 x i16>, <2 x i16>) #0 709declare <3 x i16> @llvm.usub.sat.v3i16(<3 x i16>, <3 x i16>) #0 710declare <4 x i16> @llvm.usub.sat.v4i16(<4 x i16>, <4 x i16>) #0 711declare i32 @llvm.usub.sat.i32(i32, i32) #0 712declare <2 x i32> @llvm.usub.sat.v2i32(<2 x i32>, <2 x i32>) #0 713declare <3 x i32> @llvm.usub.sat.v3i32(<3 x i32>, <3 x i32>) #0 714declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>) #0 715declare <8 x i32> @llvm.usub.sat.v8i32(<8 x i32>, <8 x i32>) #0 716declare <16 x i32> @llvm.usub.sat.v16i32(<16 x i32>, <16 x i32>) #0 717declare i64 @llvm.usub.sat.i64(i64, i64) #0 718 719attributes #0 = { nounwind readnone speculatable willreturn } 720