1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -slp-vectorizer -instcombine %s | FileCheck -check-prefixes=GCN,GFX7 %s 3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -slp-vectorizer -instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -slp-vectorizer -instcombine %s | FileCheck -check-prefixes=GCN,GFX8 %s 5 6define <2 x i16> @uadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { 7; GFX7-LABEL: @uadd_sat_v2i16( 8; GFX7-NEXT: bb: 9; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 10; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 11; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 12; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 13; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 14; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 15; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> undef, i16 [[ADD_0]], i64 0 16; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 17; GFX7-NEXT: ret <2 x i16> [[INS_1]] 18; 19; GFX8-LABEL: @uadd_sat_v2i16( 20; GFX8-NEXT: bb: 21; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 22; GFX8-NEXT: ret <2 x i16> [[TMP0]] 23; 24bb: 25 %arg0.0 = extractelement <2 x i16> %arg0, i64 0 26 %arg0.1 = extractelement <2 x i16> %arg0, i64 1 27 %arg1.0 = extractelement <2 x i16> %arg1, i64 0 28 %arg1.1 = extractelement <2 x i16> %arg1, i64 1 29 %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0) 30 %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1) 31 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 32 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 33 ret <2 x i16> %ins.1 34} 35 36define <2 x i16> @usub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { 37; GFX7-LABEL: @usub_sat_v2i16( 38; GFX7-NEXT: bb: 39; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 40; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 41; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 42; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 43; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 44; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 45; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> undef, i16 [[ADD_0]], i64 0 46; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 47; GFX7-NEXT: ret <2 x i16> [[INS_1]] 48; 49; GFX8-LABEL: @usub_sat_v2i16( 50; GFX8-NEXT: bb: 51; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 52; GFX8-NEXT: ret <2 x i16> [[TMP0]] 53; 54bb: 55 %arg0.0 = extractelement <2 x i16> %arg0, i64 0 56 %arg0.1 = extractelement <2 x i16> %arg0, i64 1 57 %arg1.0 = extractelement <2 x i16> %arg1, i64 0 58 %arg1.1 = extractelement <2 x i16> %arg1, i64 1 59 %add.0 = call i16 @llvm.usub.sat.i16(i16 %arg0.0, i16 %arg1.0) 60 %add.1 = call i16 @llvm.usub.sat.i16(i16 %arg0.1, i16 %arg1.1) 61 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 62 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 63 ret <2 x i16> %ins.1 64} 65 66define <2 x i16> @sadd_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { 67; GFX7-LABEL: @sadd_sat_v2i16( 68; GFX7-NEXT: bb: 69; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 70; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 71; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 72; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 73; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 74; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.sadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 75; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> undef, i16 [[ADD_0]], i64 0 76; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 77; GFX7-NEXT: ret <2 x i16> [[INS_1]] 78; 79; GFX8-LABEL: @sadd_sat_v2i16( 80; GFX8-NEXT: bb: 81; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 82; GFX8-NEXT: ret <2 x i16> [[TMP0]] 83; 84bb: 85 %arg0.0 = extractelement <2 x i16> %arg0, i64 0 86 %arg0.1 = extractelement <2 x i16> %arg0, i64 1 87 %arg1.0 = extractelement <2 x i16> %arg1, i64 0 88 %arg1.1 = extractelement <2 x i16> %arg1, i64 1 89 %add.0 = call i16 @llvm.sadd.sat.i16(i16 %arg0.0, i16 %arg1.0) 90 %add.1 = call i16 @llvm.sadd.sat.i16(i16 %arg0.1, i16 %arg1.1) 91 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 92 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 93 ret <2 x i16> %ins.1 94} 95 96define <2 x i16> @ssub_sat_v2i16(<2 x i16> %arg0, <2 x i16> %arg1) { 97; GFX7-LABEL: @ssub_sat_v2i16( 98; GFX7-NEXT: bb: 99; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i16> [[ARG0:%.*]], i64 0 100; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i16> [[ARG0]], i64 1 101; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i16> [[ARG1:%.*]], i64 0 102; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i16> [[ARG1]], i64 1 103; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 104; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.ssub.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 105; GFX7-NEXT: [[INS_0:%.*]] = insertelement <2 x i16> undef, i16 [[ADD_0]], i64 0 106; GFX7-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 107; GFX7-NEXT: ret <2 x i16> [[INS_1]] 108; 109; GFX8-LABEL: @ssub_sat_v2i16( 110; GFX8-NEXT: bb: 111; GFX8-NEXT: [[TMP0:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[ARG0:%.*]], <2 x i16> [[ARG1:%.*]]) 112; GFX8-NEXT: ret <2 x i16> [[TMP0]] 113; 114bb: 115 %arg0.0 = extractelement <2 x i16> %arg0, i64 0 116 %arg0.1 = extractelement <2 x i16> %arg0, i64 1 117 %arg1.0 = extractelement <2 x i16> %arg1, i64 0 118 %arg1.1 = extractelement <2 x i16> %arg1, i64 1 119 %add.0 = call i16 @llvm.ssub.sat.i16(i16 %arg0.0, i16 %arg1.0) 120 %add.1 = call i16 @llvm.ssub.sat.i16(i16 %arg0.1, i16 %arg1.1) 121 %ins.0 = insertelement <2 x i16> undef, i16 %add.0, i64 0 122 %ins.1 = insertelement <2 x i16> %ins.0, i16 %add.1, i64 1 123 ret <2 x i16> %ins.1 124} 125 126define <2 x i32> @uadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { 127; GCN-LABEL: @uadd_sat_v2i32( 128; GCN-NEXT: bb: 129; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 130; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 131; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 132; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 133; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) 134; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) 135; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> undef, i32 [[ADD_0]], i64 0 136; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 137; GCN-NEXT: ret <2 x i32> [[INS_1]] 138; 139bb: 140 %arg0.0 = extractelement <2 x i32> %arg0, i64 0 141 %arg0.1 = extractelement <2 x i32> %arg0, i64 1 142 %arg1.0 = extractelement <2 x i32> %arg1, i64 0 143 %arg1.1 = extractelement <2 x i32> %arg1, i64 1 144 %add.0 = call i32 @llvm.uadd.sat.i32(i32 %arg0.0, i32 %arg1.0) 145 %add.1 = call i32 @llvm.uadd.sat.i32(i32 %arg0.1, i32 %arg1.1) 146 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0 147 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 148 ret <2 x i32> %ins.1 149} 150 151define <2 x i32> @usub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { 152; GCN-LABEL: @usub_sat_v2i32( 153; GCN-NEXT: bb: 154; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 155; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 156; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 157; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 158; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) 159; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.usub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) 160; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> undef, i32 [[ADD_0]], i64 0 161; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 162; GCN-NEXT: ret <2 x i32> [[INS_1]] 163; 164bb: 165 %arg0.0 = extractelement <2 x i32> %arg0, i64 0 166 %arg0.1 = extractelement <2 x i32> %arg0, i64 1 167 %arg1.0 = extractelement <2 x i32> %arg1, i64 0 168 %arg1.1 = extractelement <2 x i32> %arg1, i64 1 169 %add.0 = call i32 @llvm.usub.sat.i32(i32 %arg0.0, i32 %arg1.0) 170 %add.1 = call i32 @llvm.usub.sat.i32(i32 %arg0.1, i32 %arg1.1) 171 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0 172 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 173 ret <2 x i32> %ins.1 174} 175 176define <2 x i32> @sadd_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { 177; GCN-LABEL: @sadd_sat_v2i32( 178; GCN-NEXT: bb: 179; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 180; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 181; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 182; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 183; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) 184; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.sadd.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) 185; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> undef, i32 [[ADD_0]], i64 0 186; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 187; GCN-NEXT: ret <2 x i32> [[INS_1]] 188; 189bb: 190 %arg0.0 = extractelement <2 x i32> %arg0, i64 0 191 %arg0.1 = extractelement <2 x i32> %arg0, i64 1 192 %arg1.0 = extractelement <2 x i32> %arg1, i64 0 193 %arg1.1 = extractelement <2 x i32> %arg1, i64 1 194 %add.0 = call i32 @llvm.sadd.sat.i32(i32 %arg0.0, i32 %arg1.0) 195 %add.1 = call i32 @llvm.sadd.sat.i32(i32 %arg0.1, i32 %arg1.1) 196 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0 197 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 198 ret <2 x i32> %ins.1 199} 200 201define <2 x i32> @ssub_sat_v2i32(<2 x i32> %arg0, <2 x i32> %arg1) { 202; GCN-LABEL: @ssub_sat_v2i32( 203; GCN-NEXT: bb: 204; GCN-NEXT: [[ARG0_0:%.*]] = extractelement <2 x i32> [[ARG0:%.*]], i64 0 205; GCN-NEXT: [[ARG0_1:%.*]] = extractelement <2 x i32> [[ARG0]], i64 1 206; GCN-NEXT: [[ARG1_0:%.*]] = extractelement <2 x i32> [[ARG1:%.*]], i64 0 207; GCN-NEXT: [[ARG1_1:%.*]] = extractelement <2 x i32> [[ARG1]], i64 1 208; GCN-NEXT: [[ADD_0:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_0]], i32 [[ARG1_0]]) 209; GCN-NEXT: [[ADD_1:%.*]] = call i32 @llvm.ssub.sat.i32(i32 [[ARG0_1]], i32 [[ARG1_1]]) 210; GCN-NEXT: [[INS_0:%.*]] = insertelement <2 x i32> undef, i32 [[ADD_0]], i64 0 211; GCN-NEXT: [[INS_1:%.*]] = insertelement <2 x i32> [[INS_0]], i32 [[ADD_1]], i64 1 212; GCN-NEXT: ret <2 x i32> [[INS_1]] 213; 214bb: 215 %arg0.0 = extractelement <2 x i32> %arg0, i64 0 216 %arg0.1 = extractelement <2 x i32> %arg0, i64 1 217 %arg1.0 = extractelement <2 x i32> %arg1, i64 0 218 %arg1.1 = extractelement <2 x i32> %arg1, i64 1 219 %add.0 = call i32 @llvm.ssub.sat.i32(i32 %arg0.0, i32 %arg1.0) 220 %add.1 = call i32 @llvm.ssub.sat.i32(i32 %arg0.1, i32 %arg1.1) 221 %ins.0 = insertelement <2 x i32> undef, i32 %add.0, i64 0 222 %ins.1 = insertelement <2 x i32> %ins.0, i32 %add.1, i64 1 223 ret <2 x i32> %ins.1 224} 225 226define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) { 227; GFX7-LABEL: @uadd_sat_v3i16( 228; GFX7-NEXT: bb: 229; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 0 230; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <3 x i16> [[ARG0]], i64 1 231; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0]], i64 2 232; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 0 233; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <3 x i16> [[ARG1]], i64 1 234; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1]], i64 2 235; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 236; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 237; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) 238; GFX7-NEXT: [[INS_0:%.*]] = insertelement <3 x i16> undef, i16 [[ADD_0]], i64 0 239; GFX7-NEXT: [[INS_1:%.*]] = insertelement <3 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 240; GFX7-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[INS_1]], i16 [[ADD_2]], i64 2 241; GFX7-NEXT: ret <3 x i16> [[INS_2]] 242; 243; GFX8-LABEL: @uadd_sat_v3i16( 244; GFX8-NEXT: bb: 245; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <3 x i16> [[ARG0:%.*]], i64 2 246; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <3 x i16> [[ARG1:%.*]], i64 2 247; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <3 x i16> [[ARG0]], <3 x i16> undef, <2 x i32> <i32 0, i32 1> 248; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> <i32 0, i32 1> 249; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) 250; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) 251; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 undef> 252; GFX8-NEXT: [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2 253; GFX8-NEXT: ret <3 x i16> [[INS_2]] 254; 255bb: 256 %arg0.0 = extractelement <3 x i16> %arg0, i64 0 257 %arg0.1 = extractelement <3 x i16> %arg0, i64 1 258 %arg0.2 = extractelement <3 x i16> %arg0, i64 2 259 %arg1.0 = extractelement <3 x i16> %arg1, i64 0 260 %arg1.1 = extractelement <3 x i16> %arg1, i64 1 261 %arg1.2 = extractelement <3 x i16> %arg1, i64 2 262 %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0) 263 %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1) 264 %add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2) 265 %ins.0 = insertelement <3 x i16> undef, i16 %add.0, i64 0 266 %ins.1 = insertelement <3 x i16> %ins.0, i16 %add.1, i64 1 267 %ins.2 = insertelement <3 x i16> %ins.1, i16 %add.2, i64 2 268 ret <3 x i16> %ins.2 269} 270 271define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) { 272; GFX7-LABEL: @uadd_sat_v4i16( 273; GFX7-NEXT: bb: 274; GFX7-NEXT: [[ARG0_0:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 0 275; GFX7-NEXT: [[ARG0_1:%.*]] = extractelement <4 x i16> [[ARG0]], i64 1 276; GFX7-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0]], i64 2 277; GFX7-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3 278; GFX7-NEXT: [[ARG1_0:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 0 279; GFX7-NEXT: [[ARG1_1:%.*]] = extractelement <4 x i16> [[ARG1]], i64 1 280; GFX7-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1]], i64 2 281; GFX7-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3 282; GFX7-NEXT: [[ADD_0:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_0]], i16 [[ARG1_0]]) 283; GFX7-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]]) 284; GFX7-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]]) 285; GFX7-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]]) 286; GFX7-NEXT: [[INS_0:%.*]] = insertelement <4 x i16> undef, i16 [[ADD_0]], i64 0 287; GFX7-NEXT: [[INS_1:%.*]] = insertelement <4 x i16> [[INS_0]], i16 [[ADD_1]], i64 1 288; GFX7-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[INS_1]], i16 [[ADD_2]], i64 2 289; GFX7-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3 290; GFX7-NEXT: ret <4 x i16> [[INS_3]] 291; 292; GFX8-LABEL: @uadd_sat_v4i16( 293; GFX8-NEXT: bb: 294; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> undef, <2 x i32> <i32 0, i32 1> 295; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> undef, <2 x i32> <i32 0, i32 1> 296; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) 297; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> <i32 2, i32 3> 298; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> <i32 2, i32 3> 299; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]]) 300; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3> 301; GFX8-NEXT: ret <4 x i16> [[INS_31]] 302; 303bb: 304 %arg0.0 = extractelement <4 x i16> %arg0, i64 0 305 %arg0.1 = extractelement <4 x i16> %arg0, i64 1 306 %arg0.2 = extractelement <4 x i16> %arg0, i64 2 307 %arg0.3 = extractelement <4 x i16> %arg0, i64 3 308 %arg1.0 = extractelement <4 x i16> %arg1, i64 0 309 %arg1.1 = extractelement <4 x i16> %arg1, i64 1 310 %arg1.2 = extractelement <4 x i16> %arg1, i64 2 311 %arg1.3 = extractelement <4 x i16> %arg1, i64 3 312 %add.0 = call i16 @llvm.uadd.sat.i16(i16 %arg0.0, i16 %arg1.0) 313 %add.1 = call i16 @llvm.uadd.sat.i16(i16 %arg0.1, i16 %arg1.1) 314 %add.2 = call i16 @llvm.uadd.sat.i16(i16 %arg0.2, i16 %arg1.2) 315 %add.3 = call i16 @llvm.uadd.sat.i16(i16 %arg0.3, i16 %arg1.3) 316 %ins.0 = insertelement <4 x i16> undef, i16 %add.0, i64 0 317 %ins.1 = insertelement <4 x i16> %ins.0, i16 %add.1, i64 1 318 %ins.2 = insertelement <4 x i16> %ins.1, i16 %add.2, i64 2 319 %ins.3 = insertelement <4 x i16> %ins.2, i16 %add.3, i64 3 320 ret <4 x i16> %ins.3 321} 322 323declare i16 @llvm.uadd.sat.i16(i16, i16) #0 324declare i16 @llvm.usub.sat.i16(i16, i16) #0 325declare i16 @llvm.sadd.sat.i16(i16, i16) #0 326declare i16 @llvm.ssub.sat.i16(i16, i16) #0 327 328declare i32 @llvm.uadd.sat.i32(i32, i32) #0 329declare i32 @llvm.usub.sat.i32(i32, i32) #0 330declare i32 @llvm.sadd.sat.i32(i32, i32) #0 331declare i32 @llvm.ssub.sat.i32(i32, i32) #0 332 333attributes #0 = { nounwind readnone speculatable willreturn } 334