1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck --check-prefix=GFX6 %s 3; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck --check-prefix=GFX8 %s 4; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s 5; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s 6 7define i8 @v_uaddsat_i8(i8 %lhs, i8 %rhs) { 8; GFX6-LABEL: v_uaddsat_i8: 9; GFX6: ; %bb.0: 10; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 12; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0 13; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 14; GFX6-NEXT: v_min_u32_e32 v0, 0xff, v0 15; GFX6-NEXT: s_setpc_b64 s[30:31] 16; 17; GFX8-LABEL: v_uaddsat_i8: 18; GFX8: ; %bb.0: 19; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 21; GFX8-NEXT: v_min_u16_e32 v0, 0xff, v0 22; GFX8-NEXT: s_setpc_b64 s[30:31] 23; 24; GFX9-LABEL: v_uaddsat_i8: 25; GFX9: ; %bb.0: 26; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX9-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 28; GFX9-NEXT: v_min_u16_e32 v0, 0xff, v0 29; GFX9-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX10-LABEL: v_uaddsat_i8: 32; GFX10: ; %bb.0: 33; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 35; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 36; GFX10-NEXT: v_and_b32_e32 v0, 0xff, v0 37; GFX10-NEXT: v_add_nc_u16 v0, v0, v1 38; GFX10-NEXT: v_min_u16 v0, 0xff, v0 39; GFX10-NEXT: s_setpc_b64 s[30:31] 40 %result = call i8 @llvm.uadd.sat.i8(i8 %lhs, i8 %rhs) 41 ret i8 %result 42} 43 44define i16 @v_uaddsat_i16(i16 %lhs, i16 %rhs) { 45; GFX6-LABEL: v_uaddsat_i16: 46; GFX6: ; %bb.0: 47; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 49; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 50; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 51; GFX6-NEXT: v_min_u32_e32 v0, 0xffff, v0 52; GFX6-NEXT: s_setpc_b64 s[30:31] 53; 54; GFX8-LABEL: v_uaddsat_i16: 55; GFX8: ; %bb.0: 56; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 57; GFX8-NEXT: v_add_u16_e64 v0, v0, v1 clamp 58; GFX8-NEXT: s_setpc_b64 s[30:31] 59; 60; GFX9-LABEL: v_uaddsat_i16: 61; GFX9: ; %bb.0: 62; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 63; GFX9-NEXT: v_add_u16_e64 v0, v0, v1 clamp 64; GFX9-NEXT: s_setpc_b64 s[30:31] 65; 66; GFX10-LABEL: v_uaddsat_i16: 67; GFX10: ; %bb.0: 68; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 69; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 70; GFX10-NEXT: v_add_nc_u16 v0, v0, v1 clamp 71; GFX10-NEXT: s_setpc_b64 s[30:31] 72 %result = call i16 @llvm.uadd.sat.i16(i16 %lhs, i16 %rhs) 73 ret i16 %result 74} 75 76define i32 @v_uaddsat_i32(i32 %lhs, i32 %rhs) { 77; GFX6-LABEL: v_uaddsat_i32: 78; GFX6: ; %bb.0: 79; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX6-NEXT: v_not_b32_e32 v2, v1 81; GFX6-NEXT: v_min_u32_e32 v0, v0, v2 82; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 83; GFX6-NEXT: s_setpc_b64 s[30:31] 84; 85; GFX8-LABEL: v_uaddsat_i32: 86; GFX8: ; %bb.0: 87; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 88; GFX8-NEXT: v_add_u32_e64 v0, s[4:5], v0, v1 clamp 89; GFX8-NEXT: s_setpc_b64 s[30:31] 90; 91; GFX9-LABEL: v_uaddsat_i32: 92; GFX9: ; %bb.0: 93; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 94; GFX9-NEXT: v_add_u32_e64 v0, v0, v1 clamp 95; GFX9-NEXT: s_setpc_b64 s[30:31] 96; 97; GFX10-LABEL: v_uaddsat_i32: 98; GFX10: ; %bb.0: 99; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 101; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v1 clamp 102; GFX10-NEXT: s_setpc_b64 s[30:31] 103 %result = call i32 @llvm.uadd.sat.i32(i32 %lhs, i32 %rhs) 104 ret i32 %result 105} 106 107define <2 x i16> @v_uaddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) { 108; GFX6-LABEL: v_uaddsat_v2i16: 109; GFX6: ; %bb.0: 110; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 111; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3 112; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 113; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 114; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 115; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 116; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 117; GFX6-NEXT: v_min_u32_e32 v1, 0xffff, v1 118; GFX6-NEXT: v_min_u32_e32 v0, 0xffff, v0 119; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 120; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 121; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 122; GFX6-NEXT: s_setpc_b64 s[30:31] 123; 124; GFX8-LABEL: v_uaddsat_v2i16: 125; GFX8: ; %bb.0: 126; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GFX8-NEXT: v_add_u16_sdwa v2, v0, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 128; GFX8-NEXT: v_add_u16_e64 v0, v0, v1 clamp 129; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 130; GFX8-NEXT: s_setpc_b64 s[30:31] 131; 132; GFX9-LABEL: v_uaddsat_v2i16: 133; GFX9: ; %bb.0: 134; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 135; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 clamp 136; GFX9-NEXT: s_setpc_b64 s[30:31] 137; 138; GFX10-LABEL: v_uaddsat_v2i16: 139; GFX10: ; %bb.0: 140; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 141; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 142; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 clamp 143; GFX10-NEXT: s_setpc_b64 s[30:31] 144 %result = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %lhs, <2 x i16> %rhs) 145 ret <2 x i16> %result 146} 147 148define <3 x i16> @v_uaddsat_v3i16(<3 x i16> %lhs, <3 x i16> %rhs) { 149; GFX6-LABEL: v_uaddsat_v3i16: 150; GFX6: ; %bb.0: 151; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 152; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 153; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 154; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5 155; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 156; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3 157; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 158; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v4 159; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v3 160; GFX6-NEXT: v_min_u32_e32 v1, 0xffff, v1 161; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5 162; GFX6-NEXT: v_min_u32_e32 v0, 0xffff, v0 163; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 164; GFX6-NEXT: v_min_u32_e32 v3, 0xffff, v2 165; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 166; GFX6-NEXT: v_or_b32_e32 v2, 0xffff0000, v3 167; GFX6-NEXT: v_alignbit_b32 v1, v3, v1, 16 168; GFX6-NEXT: s_setpc_b64 s[30:31] 169; 170; GFX8-LABEL: v_uaddsat_v3i16: 171; GFX8: ; %bb.0: 172; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 173; GFX8-NEXT: v_add_u16_sdwa v4, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 174; GFX8-NEXT: v_add_u16_e64 v0, v0, v2 clamp 175; GFX8-NEXT: v_add_u16_e64 v1, v1, v3 clamp 176; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 177; GFX8-NEXT: s_setpc_b64 s[30:31] 178; 179; GFX9-LABEL: v_uaddsat_v3i16: 180; GFX9: ; %bb.0: 181; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 182; GFX9-NEXT: v_pk_add_u16 v1, v1, v3 clamp 183; GFX9-NEXT: v_pk_add_u16 v0, v0, v2 clamp 184; GFX9-NEXT: s_setpc_b64 s[30:31] 185; 186; GFX10-LABEL: v_uaddsat_v3i16: 187; GFX10: ; %bb.0: 188; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 189; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 190; GFX10-NEXT: v_pk_add_u16 v0, v0, v2 clamp 191; GFX10-NEXT: v_pk_add_u16 v1, v1, v3 clamp 192; GFX10-NEXT: s_setpc_b64 s[30:31] 193 %result = call <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16> %lhs, <3 x i16> %rhs) 194 ret <3 x i16> %result 195} 196 197define <2 x float> @v_uaddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { 198; GFX6-LABEL: v_uaddsat_v4i16: 199; GFX6: ; %bb.0: 200; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5 202; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 203; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 204; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 205; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v5 206; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v4 207; GFX6-NEXT: v_min_u32_e32 v1, 0xffff, v1 208; GFX6-NEXT: v_and_b32_e32 v7, 0xffff, v7 209; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3 210; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6 211; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 212; GFX6-NEXT: v_min_u32_e32 v0, 0xffff, v0 213; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 214; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 215; GFX6-NEXT: v_add_i32_e32 v1, vcc, v2, v6 216; GFX6-NEXT: v_add_i32_e32 v2, vcc, v3, v7 217; GFX6-NEXT: v_min_u32_e32 v2, 0xffff, v2 218; GFX6-NEXT: v_min_u32_e32 v1, 0xffff, v1 219; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 220; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 221; GFX6-NEXT: s_setpc_b64 s[30:31] 222; 223; GFX8-LABEL: v_uaddsat_v4i16: 224; GFX8: ; %bb.0: 225; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 226; GFX8-NEXT: v_add_u16_sdwa v4, v0, v2 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 227; GFX8-NEXT: v_add_u16_e64 v0, v0, v2 clamp 228; GFX8-NEXT: v_add_u16_sdwa v2, v1, v3 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 229; GFX8-NEXT: v_add_u16_e64 v1, v1, v3 clamp 230; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 231; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 232; GFX8-NEXT: s_setpc_b64 s[30:31] 233; 234; GFX9-LABEL: v_uaddsat_v4i16: 235; GFX9: ; %bb.0: 236; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 237; GFX9-NEXT: v_pk_add_u16 v0, v0, v2 clamp 238; GFX9-NEXT: v_pk_add_u16 v1, v1, v3 clamp 239; GFX9-NEXT: s_setpc_b64 s[30:31] 240; 241; GFX10-LABEL: v_uaddsat_v4i16: 242; GFX10: ; %bb.0: 243; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 244; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 245; GFX10-NEXT: v_pk_add_u16 v0, v0, v2 clamp 246; GFX10-NEXT: v_pk_add_u16 v1, v1, v3 clamp 247; GFX10-NEXT: s_setpc_b64 s[30:31] 248 %result = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) 249 %cast = bitcast <4 x i16> %result to <2 x float> 250 ret <2 x float> %cast 251} 252 253define <2 x i32> @v_uaddsat_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { 254; GFX6-LABEL: v_uaddsat_v2i32: 255; GFX6: ; %bb.0: 256; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 257; GFX6-NEXT: v_not_b32_e32 v4, v2 258; GFX6-NEXT: v_min_u32_e32 v0, v0, v4 259; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 260; GFX6-NEXT: v_not_b32_e32 v2, v3 261; GFX6-NEXT: v_min_u32_e32 v1, v1, v2 262; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v3 263; GFX6-NEXT: s_setpc_b64 s[30:31] 264; 265; GFX8-LABEL: v_uaddsat_v2i32: 266; GFX8: ; %bb.0: 267; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 268; GFX8-NEXT: v_add_u32_e64 v0, s[4:5], v0, v2 clamp 269; GFX8-NEXT: v_add_u32_e64 v1, s[4:5], v1, v3 clamp 270; GFX8-NEXT: s_setpc_b64 s[30:31] 271; 272; GFX9-LABEL: v_uaddsat_v2i32: 273; GFX9: ; %bb.0: 274; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 275; GFX9-NEXT: v_add_u32_e64 v0, v0, v2 clamp 276; GFX9-NEXT: v_add_u32_e64 v1, v1, v3 clamp 277; GFX9-NEXT: s_setpc_b64 s[30:31] 278; 279; GFX10-LABEL: v_uaddsat_v2i32: 280; GFX10: ; %bb.0: 281; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 282; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 283; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v2 clamp 284; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v3 clamp 285; GFX10-NEXT: s_setpc_b64 s[30:31] 286 %result = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) 287 ret <2 x i32> %result 288} 289 290define <3 x i32> @v_uaddsat_v3i32(<3 x i32> %lhs, <3 x i32> %rhs) { 291; GFX6-LABEL: v_uaddsat_v3i32: 292; GFX6: ; %bb.0: 293; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 294; GFX6-NEXT: v_not_b32_e32 v6, v3 295; GFX6-NEXT: v_min_u32_e32 v0, v0, v6 296; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v3 297; GFX6-NEXT: v_not_b32_e32 v3, v4 298; GFX6-NEXT: v_min_u32_e32 v1, v1, v3 299; GFX6-NEXT: v_not_b32_e32 v3, v5 300; GFX6-NEXT: v_min_u32_e32 v2, v2, v3 301; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v4 302; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v5 303; GFX6-NEXT: s_setpc_b64 s[30:31] 304; 305; GFX8-LABEL: v_uaddsat_v3i32: 306; GFX8: ; %bb.0: 307; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 308; GFX8-NEXT: v_add_u32_e64 v0, s[4:5], v0, v3 clamp 309; GFX8-NEXT: v_add_u32_e64 v1, s[4:5], v1, v4 clamp 310; GFX8-NEXT: v_add_u32_e64 v2, s[4:5], v2, v5 clamp 311; GFX8-NEXT: s_setpc_b64 s[30:31] 312; 313; GFX9-LABEL: v_uaddsat_v3i32: 314; GFX9: ; %bb.0: 315; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 316; GFX9-NEXT: v_add_u32_e64 v0, v0, v3 clamp 317; GFX9-NEXT: v_add_u32_e64 v1, v1, v4 clamp 318; GFX9-NEXT: v_add_u32_e64 v2, v2, v5 clamp 319; GFX9-NEXT: s_setpc_b64 s[30:31] 320; 321; GFX10-LABEL: v_uaddsat_v3i32: 322; GFX10: ; %bb.0: 323; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 324; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 325; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v3 clamp 326; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v4 clamp 327; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, v5 clamp 328; GFX10-NEXT: s_setpc_b64 s[30:31] 329 %result = call <3 x i32> @llvm.uadd.sat.v3i32(<3 x i32> %lhs, <3 x i32> %rhs) 330 ret <3 x i32> %result 331} 332 333define <4 x i32> @v_uaddsat_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { 334; GFX6-LABEL: v_uaddsat_v4i32: 335; GFX6: ; %bb.0: 336; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 337; GFX6-NEXT: v_not_b32_e32 v8, v4 338; GFX6-NEXT: v_min_u32_e32 v0, v0, v8 339; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v4 340; GFX6-NEXT: v_not_b32_e32 v4, v5 341; GFX6-NEXT: v_min_u32_e32 v1, v1, v4 342; GFX6-NEXT: v_not_b32_e32 v4, v6 343; GFX6-NEXT: v_min_u32_e32 v2, v2, v4 344; GFX6-NEXT: v_not_b32_e32 v4, v7 345; GFX6-NEXT: v_min_u32_e32 v3, v3, v4 346; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v5 347; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v6 348; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v7 349; GFX6-NEXT: s_setpc_b64 s[30:31] 350; 351; GFX8-LABEL: v_uaddsat_v4i32: 352; GFX8: ; %bb.0: 353; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 354; GFX8-NEXT: v_add_u32_e64 v0, s[4:5], v0, v4 clamp 355; GFX8-NEXT: v_add_u32_e64 v1, s[4:5], v1, v5 clamp 356; GFX8-NEXT: v_add_u32_e64 v2, s[4:5], v2, v6 clamp 357; GFX8-NEXT: v_add_u32_e64 v3, s[4:5], v3, v7 clamp 358; GFX8-NEXT: s_setpc_b64 s[30:31] 359; 360; GFX9-LABEL: v_uaddsat_v4i32: 361; GFX9: ; %bb.0: 362; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 363; GFX9-NEXT: v_add_u32_e64 v0, v0, v4 clamp 364; GFX9-NEXT: v_add_u32_e64 v1, v1, v5 clamp 365; GFX9-NEXT: v_add_u32_e64 v2, v2, v6 clamp 366; GFX9-NEXT: v_add_u32_e64 v3, v3, v7 clamp 367; GFX9-NEXT: s_setpc_b64 s[30:31] 368; 369; GFX10-LABEL: v_uaddsat_v4i32: 370; GFX10: ; %bb.0: 371; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 372; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 373; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v4 clamp 374; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v5 clamp 375; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, v6 clamp 376; GFX10-NEXT: v_add_nc_u32_e64 v3, v3, v7 clamp 377; GFX10-NEXT: s_setpc_b64 s[30:31] 378 %result = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) 379 ret <4 x i32> %result 380} 381 382define <8 x i32> @v_uaddsat_v8i32(<8 x i32> %lhs, <8 x i32> %rhs) { 383; GFX6-LABEL: v_uaddsat_v8i32: 384; GFX6: ; %bb.0: 385; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 386; GFX6-NEXT: v_not_b32_e32 v16, v8 387; GFX6-NEXT: v_min_u32_e32 v0, v0, v16 388; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v8 389; GFX6-NEXT: v_not_b32_e32 v8, v9 390; GFX6-NEXT: v_min_u32_e32 v1, v1, v8 391; GFX6-NEXT: v_not_b32_e32 v8, v10 392; GFX6-NEXT: v_min_u32_e32 v2, v2, v8 393; GFX6-NEXT: v_not_b32_e32 v8, v11 394; GFX6-NEXT: v_min_u32_e32 v3, v3, v8 395; GFX6-NEXT: v_not_b32_e32 v8, v12 396; GFX6-NEXT: v_min_u32_e32 v4, v4, v8 397; GFX6-NEXT: v_not_b32_e32 v8, v13 398; GFX6-NEXT: v_min_u32_e32 v5, v5, v8 399; GFX6-NEXT: v_not_b32_e32 v8, v14 400; GFX6-NEXT: v_min_u32_e32 v6, v6, v8 401; GFX6-NEXT: v_not_b32_e32 v8, v15 402; GFX6-NEXT: v_min_u32_e32 v7, v7, v8 403; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v9 404; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v10 405; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v11 406; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v12 407; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v13 408; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v14 409; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v15 410; GFX6-NEXT: s_setpc_b64 s[30:31] 411; 412; GFX8-LABEL: v_uaddsat_v8i32: 413; GFX8: ; %bb.0: 414; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 415; GFX8-NEXT: v_add_u32_e64 v0, s[4:5], v0, v8 clamp 416; GFX8-NEXT: v_add_u32_e64 v1, s[4:5], v1, v9 clamp 417; GFX8-NEXT: v_add_u32_e64 v2, s[4:5], v2, v10 clamp 418; GFX8-NEXT: v_add_u32_e64 v3, s[4:5], v3, v11 clamp 419; GFX8-NEXT: v_add_u32_e64 v4, s[4:5], v4, v12 clamp 420; GFX8-NEXT: v_add_u32_e64 v5, s[4:5], v5, v13 clamp 421; GFX8-NEXT: v_add_u32_e64 v6, s[4:5], v6, v14 clamp 422; GFX8-NEXT: v_add_u32_e64 v7, s[4:5], v7, v15 clamp 423; GFX8-NEXT: s_setpc_b64 s[30:31] 424; 425; GFX9-LABEL: v_uaddsat_v8i32: 426; GFX9: ; %bb.0: 427; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 428; GFX9-NEXT: v_add_u32_e64 v0, v0, v8 clamp 429; GFX9-NEXT: v_add_u32_e64 v1, v1, v9 clamp 430; GFX9-NEXT: v_add_u32_e64 v2, v2, v10 clamp 431; GFX9-NEXT: v_add_u32_e64 v3, v3, v11 clamp 432; GFX9-NEXT: v_add_u32_e64 v4, v4, v12 clamp 433; GFX9-NEXT: v_add_u32_e64 v5, v5, v13 clamp 434; GFX9-NEXT: v_add_u32_e64 v6, v6, v14 clamp 435; GFX9-NEXT: v_add_u32_e64 v7, v7, v15 clamp 436; GFX9-NEXT: s_setpc_b64 s[30:31] 437; 438; GFX10-LABEL: v_uaddsat_v8i32: 439; GFX10: ; %bb.0: 440; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 441; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 442; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v8 clamp 443; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v9 clamp 444; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, v10 clamp 445; GFX10-NEXT: v_add_nc_u32_e64 v3, v3, v11 clamp 446; GFX10-NEXT: v_add_nc_u32_e64 v4, v4, v12 clamp 447; GFX10-NEXT: v_add_nc_u32_e64 v5, v5, v13 clamp 448; GFX10-NEXT: v_add_nc_u32_e64 v6, v6, v14 clamp 449; GFX10-NEXT: v_add_nc_u32_e64 v7, v7, v15 clamp 450; GFX10-NEXT: s_setpc_b64 s[30:31] 451 %result = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %lhs, <8 x i32> %rhs) 452 ret <8 x i32> %result 453} 454 455define <16 x i32> @v_uaddsat_v16i32(<16 x i32> %lhs, <16 x i32> %rhs) { 456; GFX6-LABEL: v_uaddsat_v16i32: 457; GFX6: ; %bb.0: 458; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 459; GFX6-NEXT: v_not_b32_e32 v31, v16 460; GFX6-NEXT: v_min_u32_e32 v0, v0, v31 461; GFX6-NEXT: buffer_load_dword v31, off, s[0:3], s32 462; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v16 463; GFX6-NEXT: v_not_b32_e32 v16, v17 464; GFX6-NEXT: v_min_u32_e32 v1, v1, v16 465; GFX6-NEXT: v_not_b32_e32 v16, v18 466; GFX6-NEXT: v_min_u32_e32 v2, v2, v16 467; GFX6-NEXT: v_not_b32_e32 v16, v19 468; GFX6-NEXT: v_min_u32_e32 v3, v3, v16 469; GFX6-NEXT: v_not_b32_e32 v16, v20 470; GFX6-NEXT: v_min_u32_e32 v4, v4, v16 471; GFX6-NEXT: v_not_b32_e32 v16, v21 472; GFX6-NEXT: v_min_u32_e32 v5, v5, v16 473; GFX6-NEXT: v_not_b32_e32 v16, v22 474; GFX6-NEXT: v_min_u32_e32 v6, v6, v16 475; GFX6-NEXT: v_not_b32_e32 v16, v23 476; GFX6-NEXT: v_min_u32_e32 v7, v7, v16 477; GFX6-NEXT: v_not_b32_e32 v16, v24 478; GFX6-NEXT: v_min_u32_e32 v8, v8, v16 479; GFX6-NEXT: v_not_b32_e32 v16, v25 480; GFX6-NEXT: v_min_u32_e32 v9, v9, v16 481; GFX6-NEXT: v_not_b32_e32 v16, v26 482; GFX6-NEXT: v_min_u32_e32 v10, v10, v16 483; GFX6-NEXT: v_not_b32_e32 v16, v27 484; GFX6-NEXT: v_min_u32_e32 v11, v11, v16 485; GFX6-NEXT: v_not_b32_e32 v16, v28 486; GFX6-NEXT: v_min_u32_e32 v12, v12, v16 487; GFX6-NEXT: v_not_b32_e32 v16, v29 488; GFX6-NEXT: v_min_u32_e32 v13, v13, v16 489; GFX6-NEXT: v_not_b32_e32 v16, v30 490; GFX6-NEXT: v_min_u32_e32 v14, v14, v16 491; GFX6-NEXT: v_add_i32_e32 v1, vcc, v1, v17 492; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v18 493; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v19 494; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v20 495; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v21 496; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v22 497; GFX6-NEXT: v_add_i32_e32 v7, vcc, v7, v23 498; GFX6-NEXT: v_add_i32_e32 v8, vcc, v8, v24 499; GFX6-NEXT: v_add_i32_e32 v9, vcc, v9, v25 500; GFX6-NEXT: v_add_i32_e32 v10, vcc, v10, v26 501; GFX6-NEXT: v_add_i32_e32 v11, vcc, v11, v27 502; GFX6-NEXT: v_add_i32_e32 v12, vcc, v12, v28 503; GFX6-NEXT: v_add_i32_e32 v13, vcc, v13, v29 504; GFX6-NEXT: v_add_i32_e32 v14, vcc, v14, v30 505; GFX6-NEXT: s_waitcnt vmcnt(0) 506; GFX6-NEXT: v_not_b32_e32 v16, v31 507; GFX6-NEXT: v_min_u32_e32 v15, v15, v16 508; GFX6-NEXT: v_add_i32_e32 v15, vcc, v15, v31 509; GFX6-NEXT: s_setpc_b64 s[30:31] 510; 511; GFX8-LABEL: v_uaddsat_v16i32: 512; GFX8: ; %bb.0: 513; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 514; GFX8-NEXT: v_add_u32_e64 v0, s[4:5], v0, v16 clamp 515; GFX8-NEXT: buffer_load_dword v16, off, s[0:3], s32 516; GFX8-NEXT: v_add_u32_e64 v1, s[4:5], v1, v17 clamp 517; GFX8-NEXT: v_add_u32_e64 v2, s[4:5], v2, v18 clamp 518; GFX8-NEXT: v_add_u32_e64 v3, s[4:5], v3, v19 clamp 519; GFX8-NEXT: v_add_u32_e64 v4, s[4:5], v4, v20 clamp 520; GFX8-NEXT: v_add_u32_e64 v5, s[4:5], v5, v21 clamp 521; GFX8-NEXT: v_add_u32_e64 v6, s[4:5], v6, v22 clamp 522; GFX8-NEXT: v_add_u32_e64 v7, s[4:5], v7, v23 clamp 523; GFX8-NEXT: v_add_u32_e64 v8, s[4:5], v8, v24 clamp 524; GFX8-NEXT: v_add_u32_e64 v9, s[4:5], v9, v25 clamp 525; GFX8-NEXT: v_add_u32_e64 v10, s[4:5], v10, v26 clamp 526; GFX8-NEXT: v_add_u32_e64 v11, s[4:5], v11, v27 clamp 527; GFX8-NEXT: v_add_u32_e64 v12, s[4:5], v12, v28 clamp 528; GFX8-NEXT: v_add_u32_e64 v13, s[4:5], v13, v29 clamp 529; GFX8-NEXT: v_add_u32_e64 v14, s[4:5], v14, v30 clamp 530; GFX8-NEXT: s_waitcnt vmcnt(0) 531; GFX8-NEXT: v_add_u32_e64 v15, s[4:5], v15, v16 clamp 532; GFX8-NEXT: s_setpc_b64 s[30:31] 533; 534; GFX9-LABEL: v_uaddsat_v16i32: 535; GFX9: ; %bb.0: 536; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 537; GFX9-NEXT: v_add_u32_e64 v0, v0, v16 clamp 538; GFX9-NEXT: buffer_load_dword v16, off, s[0:3], s32 539; GFX9-NEXT: v_add_u32_e64 v1, v1, v17 clamp 540; GFX9-NEXT: v_add_u32_e64 v2, v2, v18 clamp 541; GFX9-NEXT: v_add_u32_e64 v3, v3, v19 clamp 542; GFX9-NEXT: v_add_u32_e64 v4, v4, v20 clamp 543; GFX9-NEXT: v_add_u32_e64 v5, v5, v21 clamp 544; GFX9-NEXT: v_add_u32_e64 v6, v6, v22 clamp 545; GFX9-NEXT: v_add_u32_e64 v7, v7, v23 clamp 546; GFX9-NEXT: v_add_u32_e64 v8, v8, v24 clamp 547; GFX9-NEXT: v_add_u32_e64 v9, v9, v25 clamp 548; GFX9-NEXT: v_add_u32_e64 v10, v10, v26 clamp 549; GFX9-NEXT: v_add_u32_e64 v11, v11, v27 clamp 550; GFX9-NEXT: v_add_u32_e64 v12, v12, v28 clamp 551; GFX9-NEXT: v_add_u32_e64 v13, v13, v29 clamp 552; GFX9-NEXT: v_add_u32_e64 v14, v14, v30 clamp 553; GFX9-NEXT: s_waitcnt vmcnt(0) 554; GFX9-NEXT: v_add_u32_e64 v15, v15, v16 clamp 555; GFX9-NEXT: s_setpc_b64 s[30:31] 556; 557; GFX10-LABEL: v_uaddsat_v16i32: 558; GFX10: ; %bb.0: 559; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 560; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 561; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 562; GFX10-NEXT: v_add_nc_u32_e64 v0, v0, v16 clamp 563; GFX10-NEXT: v_add_nc_u32_e64 v1, v1, v17 clamp 564; GFX10-NEXT: v_add_nc_u32_e64 v2, v2, v18 clamp 565; GFX10-NEXT: v_add_nc_u32_e64 v3, v3, v19 clamp 566; GFX10-NEXT: v_add_nc_u32_e64 v4, v4, v20 clamp 567; GFX10-NEXT: v_add_nc_u32_e64 v5, v5, v21 clamp 568; GFX10-NEXT: v_add_nc_u32_e64 v6, v6, v22 clamp 569; GFX10-NEXT: v_add_nc_u32_e64 v7, v7, v23 clamp 570; GFX10-NEXT: v_add_nc_u32_e64 v8, v8, v24 clamp 571; GFX10-NEXT: v_add_nc_u32_e64 v9, v9, v25 clamp 572; GFX10-NEXT: v_add_nc_u32_e64 v10, v10, v26 clamp 573; GFX10-NEXT: v_add_nc_u32_e64 v11, v11, v27 clamp 574; GFX10-NEXT: v_add_nc_u32_e64 v12, v12, v28 clamp 575; GFX10-NEXT: v_add_nc_u32_e64 v13, v13, v29 clamp 576; GFX10-NEXT: v_add_nc_u32_e64 v14, v14, v30 clamp 577; GFX10-NEXT: s_waitcnt vmcnt(0) 578; GFX10-NEXT: v_add_nc_u32_e64 v15, v15, v31 clamp 579; GFX10-NEXT: s_setpc_b64 s[30:31] 580 %result = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %lhs, <16 x i32> %rhs) 581 ret <16 x i32> %result 582} 583 584 585define i64 @v_uaddsat_i64(i64 %lhs, i64 %rhs) { 586; GFX6-LABEL: v_uaddsat_i64: 587; GFX6: ; %bb.0: 588; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 589; GFX6-NEXT: v_add_i32_e32 v2, vcc, v0, v2 590; GFX6-NEXT: v_addc_u32_e32 v3, vcc, v1, v3, vcc 591; GFX6-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] 592; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc 593; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc 594; GFX6-NEXT: s_setpc_b64 s[30:31] 595; 596; GFX8-LABEL: v_uaddsat_i64: 597; GFX8: ; %bb.0: 598; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 599; GFX8-NEXT: v_add_u32_e32 v2, vcc, v0, v2 600; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v1, v3, vcc 601; GFX8-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] 602; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc 603; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc 604; GFX8-NEXT: s_setpc_b64 s[30:31] 605; 606; GFX9-LABEL: v_uaddsat_i64: 607; GFX9: ; %bb.0: 608; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 609; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2 610; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v3, vcc 611; GFX9-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1] 612; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc 613; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc 614; GFX9-NEXT: s_setpc_b64 s[30:31] 615; 616; GFX10-LABEL: v_uaddsat_i64: 617; GFX10: ; %bb.0: 618; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 619; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 620; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2 621; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo 622; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1] 623; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, -1, vcc_lo 624; GFX10-NEXT: v_cndmask_b32_e64 v1, v3, -1, vcc_lo 625; GFX10-NEXT: s_setpc_b64 s[30:31] 626 %result = call i64 @llvm.uadd.sat.i64(i64 %lhs, i64 %rhs) 627 ret i64 %result 628} 629 630declare i8 @llvm.uadd.sat.i8(i8, i8) #0 631declare i16 @llvm.uadd.sat.i16(i16, i16) #0 632declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>) #0 633declare <3 x i16> @llvm.uadd.sat.v3i16(<3 x i16>, <3 x i16>) #0 634declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>) #0 635declare i32 @llvm.uadd.sat.i32(i32, i32) #0 636declare <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32>, <2 x i32>) #0 637declare <3 x i32> @llvm.uadd.sat.v3i32(<3 x i32>, <3 x i32>) #0 638declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>) #0 639declare <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32>, <8 x i32>) #0 640declare <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32>, <16 x i32>) #0 641declare i64 @llvm.uadd.sat.i64(i64, i64) #0 642 643attributes #0 = { nounwind readnone speculatable willreturn } 644