1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -slp-vectorizer -slp-threshold=-10000 < %s | FileCheck %s 3; RUN: opt -S -slp-vectorizer -slp-threshold=0 < %s | FileCheck %s --check-prefix=ZEROTHRESH 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64-S128" 5 6target triple = "x86_64-apple-macosx10.8.0" 7 8define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 9; CHECK-LABEL: @simple_select( 10; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer 11; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b 12; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 13; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 14; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 15; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1 16; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 17; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 2 18; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 19; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3 20; CHECK-NEXT: ret <4 x float> [[RD]] 21; 22; ZEROTHRESH-LABEL: @simple_select( 23; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer 24; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b 25; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 26; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 27; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 28; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1 29; ZEROTHRESH-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 30; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 2 31; ZEROTHRESH-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 32; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3 33; ZEROTHRESH-NEXT: ret <4 x float> [[RD]] 34; 35 %c0 = extractelement <4 x i32> %c, i32 0 36 %c1 = extractelement <4 x i32> %c, i32 1 37 %c2 = extractelement <4 x i32> %c, i32 2 38 %c3 = extractelement <4 x i32> %c, i32 3 39 %a0 = extractelement <4 x float> %a, i32 0 40 %a1 = extractelement <4 x float> %a, i32 1 41 %a2 = extractelement <4 x float> %a, i32 2 42 %a3 = extractelement <4 x float> %a, i32 3 43 %b0 = extractelement <4 x float> %b, i32 0 44 %b1 = extractelement <4 x float> %b, i32 1 45 %b2 = extractelement <4 x float> %b, i32 2 46 %b3 = extractelement <4 x float> %b, i32 3 47 %cmp0 = icmp ne i32 %c0, 0 48 %cmp1 = icmp ne i32 %c1, 0 49 %cmp2 = icmp ne i32 %c2, 0 50 %cmp3 = icmp ne i32 %c3, 0 51 %s0 = select i1 %cmp0, float %a0, float %b0 52 %s1 = select i1 %cmp1, float %a1, float %b1 53 %s2 = select i1 %cmp2, float %a2, float %b2 54 %s3 = select i1 %cmp3, float %a3, float %b3 55 %ra = insertelement <4 x float> undef, float %s0, i32 0 56 %rb = insertelement <4 x float> %ra, float %s1, i32 1 57 %rc = insertelement <4 x float> %rb, float %s2, i32 2 58 %rd = insertelement <4 x float> %rc, float %s3, i32 3 59 ret <4 x float> %rd 60} 61 62declare void @llvm.assume(i1) nounwind 63 64; This entire tree is ephemeral, don't vectorize any of it. 65define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 66; CHECK-LABEL: @simple_select_eph( 67; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0 68; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1 69; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2 70; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3 71; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0 72; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1 73; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2 74; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3 75; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0 76; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1 77; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2 78; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3 79; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0 80; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0 81; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0 82; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[C3]], 0 83; CHECK-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]] 84; CHECK-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]] 85; CHECK-NEXT: [[S2:%.*]] = select i1 [[CMP2]], float [[A2]], float [[B2]] 86; CHECK-NEXT: [[S3:%.*]] = select i1 [[CMP3]], float [[A3]], float [[B3]] 87; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0 88; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1 89; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[S2]], i32 2 90; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3 91; CHECK-NEXT: [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0 92; CHECK-NEXT: [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1 93; CHECK-NEXT: [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2 94; CHECK-NEXT: [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3 95; CHECK-NEXT: [[Q4:%.*]] = fadd float [[Q0]], [[Q1]] 96; CHECK-NEXT: [[Q5:%.*]] = fadd float [[Q2]], [[Q3]] 97; CHECK-NEXT: [[Q6:%.*]] = fadd float [[Q4]], [[Q5]] 98; CHECK-NEXT: [[QI:%.*]] = fcmp olt float [[Q6]], [[Q5]] 99; CHECK-NEXT: call void @llvm.assume(i1 [[QI]]) 100; CHECK-NEXT: ret <4 x float> undef 101; 102; ZEROTHRESH-LABEL: @simple_select_eph( 103; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0 104; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1 105; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2 106; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3 107; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0 108; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1 109; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2 110; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3 111; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0 112; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1 113; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2 114; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3 115; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0 116; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0 117; ZEROTHRESH-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0 118; ZEROTHRESH-NEXT: [[CMP3:%.*]] = icmp ne i32 [[C3]], 0 119; ZEROTHRESH-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]] 120; ZEROTHRESH-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]] 121; ZEROTHRESH-NEXT: [[S2:%.*]] = select i1 [[CMP2]], float [[A2]], float [[B2]] 122; ZEROTHRESH-NEXT: [[S3:%.*]] = select i1 [[CMP3]], float [[A3]], float [[B3]] 123; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0 124; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1 125; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[S2]], i32 2 126; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3 127; ZEROTHRESH-NEXT: [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0 128; ZEROTHRESH-NEXT: [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1 129; ZEROTHRESH-NEXT: [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2 130; ZEROTHRESH-NEXT: [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3 131; ZEROTHRESH-NEXT: [[Q4:%.*]] = fadd float [[Q0]], [[Q1]] 132; ZEROTHRESH-NEXT: [[Q5:%.*]] = fadd float [[Q2]], [[Q3]] 133; ZEROTHRESH-NEXT: [[Q6:%.*]] = fadd float [[Q4]], [[Q5]] 134; ZEROTHRESH-NEXT: [[QI:%.*]] = fcmp olt float [[Q6]], [[Q5]] 135; ZEROTHRESH-NEXT: call void @llvm.assume(i1 [[QI]]) 136; ZEROTHRESH-NEXT: ret <4 x float> undef 137; 138 %c0 = extractelement <4 x i32> %c, i32 0 139 %c1 = extractelement <4 x i32> %c, i32 1 140 %c2 = extractelement <4 x i32> %c, i32 2 141 %c3 = extractelement <4 x i32> %c, i32 3 142 %a0 = extractelement <4 x float> %a, i32 0 143 %a1 = extractelement <4 x float> %a, i32 1 144 %a2 = extractelement <4 x float> %a, i32 2 145 %a3 = extractelement <4 x float> %a, i32 3 146 %b0 = extractelement <4 x float> %b, i32 0 147 %b1 = extractelement <4 x float> %b, i32 1 148 %b2 = extractelement <4 x float> %b, i32 2 149 %b3 = extractelement <4 x float> %b, i32 3 150 %cmp0 = icmp ne i32 %c0, 0 151 %cmp1 = icmp ne i32 %c1, 0 152 %cmp2 = icmp ne i32 %c2, 0 153 %cmp3 = icmp ne i32 %c3, 0 154 %s0 = select i1 %cmp0, float %a0, float %b0 155 %s1 = select i1 %cmp1, float %a1, float %b1 156 %s2 = select i1 %cmp2, float %a2, float %b2 157 %s3 = select i1 %cmp3, float %a3, float %b3 158 %ra = insertelement <4 x float> undef, float %s0, i32 0 159 %rb = insertelement <4 x float> %ra, float %s1, i32 1 160 %rc = insertelement <4 x float> %rb, float %s2, i32 2 161 %rd = insertelement <4 x float> %rc, float %s3, i32 3 162 %q0 = extractelement <4 x float> %rd, i32 0 163 %q1 = extractelement <4 x float> %rd, i32 1 164 %q2 = extractelement <4 x float> %rd, i32 2 165 %q3 = extractelement <4 x float> %rd, i32 3 166 %q4 = fadd float %q0, %q1 167 %q5 = fadd float %q2, %q3 168 %q6 = fadd float %q4, %q5 169 %qi = fcmp olt float %q6, %q5 170 call void @llvm.assume(i1 %qi) 171 ret <4 x float> undef 172} 173 174; Insert in an order different from the vector indices to make sure it 175; doesn't matter 176define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 177; CHECK-LABEL: @simple_select_insert_out_of_order( 178; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer 179; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b 180; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 181; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2 182; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 183; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1 184; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 185; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 0 186; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 187; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3 188; CHECK-NEXT: ret <4 x float> [[RD]] 189; 190; ZEROTHRESH-LABEL: @simple_select_insert_out_of_order( 191; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer 192; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b 193; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 194; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2 195; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 196; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1 197; ZEROTHRESH-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 198; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 0 199; ZEROTHRESH-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 200; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3 201; ZEROTHRESH-NEXT: ret <4 x float> [[RD]] 202; 203 %c0 = extractelement <4 x i32> %c, i32 0 204 %c1 = extractelement <4 x i32> %c, i32 1 205 %c2 = extractelement <4 x i32> %c, i32 2 206 %c3 = extractelement <4 x i32> %c, i32 3 207 %a0 = extractelement <4 x float> %a, i32 0 208 %a1 = extractelement <4 x float> %a, i32 1 209 %a2 = extractelement <4 x float> %a, i32 2 210 %a3 = extractelement <4 x float> %a, i32 3 211 %b0 = extractelement <4 x float> %b, i32 0 212 %b1 = extractelement <4 x float> %b, i32 1 213 %b2 = extractelement <4 x float> %b, i32 2 214 %b3 = extractelement <4 x float> %b, i32 3 215 %cmp0 = icmp ne i32 %c0, 0 216 %cmp1 = icmp ne i32 %c1, 0 217 %cmp2 = icmp ne i32 %c2, 0 218 %cmp3 = icmp ne i32 %c3, 0 219 %s0 = select i1 %cmp0, float %a0, float %b0 220 %s1 = select i1 %cmp1, float %a1, float %b1 221 %s2 = select i1 %cmp2, float %a2, float %b2 222 %s3 = select i1 %cmp3, float %a3, float %b3 223 %ra = insertelement <4 x float> undef, float %s0, i32 2 224 %rb = insertelement <4 x float> %ra, float %s1, i32 1 225 %rc = insertelement <4 x float> %rb, float %s2, i32 0 226 %rd = insertelement <4 x float> %rc, float %s3, i32 3 227 ret <4 x float> %rd 228} 229 230declare void @v4f32_user(<4 x float>) #0 231declare void @f32_user(float) #0 232 233; Multiple users of the final constructed vector 234define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 235; CHECK-LABEL: @simple_select_users( 236; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer 237; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b 238; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 239; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 240; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 241; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1 242; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 243; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 2 244; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 245; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3 246; CHECK-NEXT: call void @v4f32_user(<4 x float> [[RD]]) #0 247; CHECK-NEXT: ret <4 x float> [[RD]] 248; 249; ZEROTHRESH-LABEL: @simple_select_users( 250; ZEROTHRESH-NEXT: [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer 251; ZEROTHRESH-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b 252; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0 253; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 254; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1 255; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1 256; ZEROTHRESH-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2 257; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 2 258; ZEROTHRESH-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3 259; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3 260; ZEROTHRESH-NEXT: call void @v4f32_user(<4 x float> [[RD]]) #0 261; ZEROTHRESH-NEXT: ret <4 x float> [[RD]] 262; 263 %c0 = extractelement <4 x i32> %c, i32 0 264 %c1 = extractelement <4 x i32> %c, i32 1 265 %c2 = extractelement <4 x i32> %c, i32 2 266 %c3 = extractelement <4 x i32> %c, i32 3 267 %a0 = extractelement <4 x float> %a, i32 0 268 %a1 = extractelement <4 x float> %a, i32 1 269 %a2 = extractelement <4 x float> %a, i32 2 270 %a3 = extractelement <4 x float> %a, i32 3 271 %b0 = extractelement <4 x float> %b, i32 0 272 %b1 = extractelement <4 x float> %b, i32 1 273 %b2 = extractelement <4 x float> %b, i32 2 274 %b3 = extractelement <4 x float> %b, i32 3 275 %cmp0 = icmp ne i32 %c0, 0 276 %cmp1 = icmp ne i32 %c1, 0 277 %cmp2 = icmp ne i32 %c2, 0 278 %cmp3 = icmp ne i32 %c3, 0 279 %s0 = select i1 %cmp0, float %a0, float %b0 280 %s1 = select i1 %cmp1, float %a1, float %b1 281 %s2 = select i1 %cmp2, float %a2, float %b2 282 %s3 = select i1 %cmp3, float %a3, float %b3 283 %ra = insertelement <4 x float> undef, float %s0, i32 0 284 %rb = insertelement <4 x float> %ra, float %s1, i32 1 285 %rc = insertelement <4 x float> %rb, float %s2, i32 2 286 %rd = insertelement <4 x float> %rc, float %s3, i32 3 287 call void @v4f32_user(<4 x float> %rd) #0 288 ret <4 x float> %rd 289} 290 291; Unused insertelement 292define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 293; CHECK-LABEL: @simple_select_no_users( 294; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0 295; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1 296; CHECK-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2 297; CHECK-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3 298; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0 299; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1 300; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2 301; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3 302; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0 303; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1 304; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2 305; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3 306; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0 307; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0 308; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0 309; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C3]], i32 1 310; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer 311; CHECK-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]] 312; CHECK-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]] 313; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0 314; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A3]], i32 1 315; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0 316; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B3]], i32 1 317; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]] 318; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0 319; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1 320; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 321; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i32 2 322; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 323; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP10]], i32 3 324; CHECK-NEXT: ret <4 x float> [[RD]] 325; 326; ZEROTHRESH-LABEL: @simple_select_no_users( 327; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0 328; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1 329; ZEROTHRESH-NEXT: [[C2:%.*]] = extractelement <4 x i32> %c, i32 2 330; ZEROTHRESH-NEXT: [[C3:%.*]] = extractelement <4 x i32> %c, i32 3 331; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0 332; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1 333; ZEROTHRESH-NEXT: [[A2:%.*]] = extractelement <4 x float> %a, i32 2 334; ZEROTHRESH-NEXT: [[A3:%.*]] = extractelement <4 x float> %a, i32 3 335; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0 336; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1 337; ZEROTHRESH-NEXT: [[B2:%.*]] = extractelement <4 x float> %b, i32 2 338; ZEROTHRESH-NEXT: [[B3:%.*]] = extractelement <4 x float> %b, i32 3 339; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0 340; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0 341; ZEROTHRESH-NEXT: [[CMP2:%.*]] = icmp ne i32 [[C2]], 0 342; ZEROTHRESH-NEXT: [[CMP3:%.*]] = icmp ne i32 [[C3]], 0 343; ZEROTHRESH-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]] 344; ZEROTHRESH-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]] 345; ZEROTHRESH-NEXT: [[S2:%.*]] = select i1 [[CMP2]], float [[A2]], float [[B2]] 346; ZEROTHRESH-NEXT: [[S3:%.*]] = select i1 [[CMP3]], float [[A3]], float [[B3]] 347; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0 348; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1 349; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x float> undef, float [[S2]], i32 2 350; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3 351; ZEROTHRESH-NEXT: ret <4 x float> [[RD]] 352; 353 %c0 = extractelement <4 x i32> %c, i32 0 354 %c1 = extractelement <4 x i32> %c, i32 1 355 %c2 = extractelement <4 x i32> %c, i32 2 356 %c3 = extractelement <4 x i32> %c, i32 3 357 %a0 = extractelement <4 x float> %a, i32 0 358 %a1 = extractelement <4 x float> %a, i32 1 359 %a2 = extractelement <4 x float> %a, i32 2 360 %a3 = extractelement <4 x float> %a, i32 3 361 %b0 = extractelement <4 x float> %b, i32 0 362 %b1 = extractelement <4 x float> %b, i32 1 363 %b2 = extractelement <4 x float> %b, i32 2 364 %b3 = extractelement <4 x float> %b, i32 3 365 %cmp0 = icmp ne i32 %c0, 0 366 %cmp1 = icmp ne i32 %c1, 0 367 %cmp2 = icmp ne i32 %c2, 0 368 %cmp3 = icmp ne i32 %c3, 0 369 %s0 = select i1 %cmp0, float %a0, float %b0 370 %s1 = select i1 %cmp1, float %a1, float %b1 371 %s2 = select i1 %cmp2, float %a2, float %b2 372 %s3 = select i1 %cmp3, float %a3, float %b3 373 %ra = insertelement <4 x float> undef, float %s0, i32 0 374 %rb = insertelement <4 x float> %ra, float %s1, i32 1 375 %rc = insertelement <4 x float> undef, float %s2, i32 2 376 %rd = insertelement <4 x float> %rc, float %s3, i32 3 377 ret <4 x float> %rd 378} 379 380; Make sure infinite loop doesn't happen which I ran into when trying 381; to do this backwards this backwards 382define <4 x i32> @reconstruct(<4 x i32> %c) #0 { 383; CHECK-LABEL: @reconstruct( 384; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0 385; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0 386; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1 387; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1 388; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2 389; CHECK-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2 390; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3 391; CHECK-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3 392; CHECK-NEXT: ret <4 x i32> [[RD]] 393; 394; ZEROTHRESH-LABEL: @reconstruct( 395; ZEROTHRESH-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0 396; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0 397; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1 398; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1 399; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2 400; ZEROTHRESH-NEXT: [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2 401; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3 402; ZEROTHRESH-NEXT: [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3 403; ZEROTHRESH-NEXT: ret <4 x i32> [[RD]] 404; 405 %c0 = extractelement <4 x i32> %c, i32 0 406 %c1 = extractelement <4 x i32> %c, i32 1 407 %c2 = extractelement <4 x i32> %c, i32 2 408 %c3 = extractelement <4 x i32> %c, i32 3 409 %ra = insertelement <4 x i32> undef, i32 %c0, i32 0 410 %rb = insertelement <4 x i32> %ra, i32 %c1, i32 1 411 %rc = insertelement <4 x i32> %rb, i32 %c2, i32 2 412 %rd = insertelement <4 x i32> %rc, i32 %c3, i32 3 413 ret <4 x i32> %rd 414} 415 416define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 { 417; CHECK-LABEL: @simple_select_v2( 418; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <2 x i32> %c, zeroinitializer 419; CHECK-NEXT: [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> %a, <2 x float> %b 420; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 421; CHECK-NEXT: [[RA:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 0 422; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 423; CHECK-NEXT: [[RB:%.*]] = insertelement <2 x float> [[RA]], float [[TMP4]], i32 1 424; CHECK-NEXT: ret <2 x float> [[RB]] 425; 426; ZEROTHRESH-LABEL: @simple_select_v2( 427; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <2 x i32> %c, i32 0 428; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <2 x i32> %c, i32 1 429; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <2 x float> %a, i32 0 430; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <2 x float> %a, i32 1 431; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <2 x float> %b, i32 0 432; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <2 x float> %b, i32 1 433; ZEROTHRESH-NEXT: [[CMP0:%.*]] = icmp ne i32 [[C0]], 0 434; ZEROTHRESH-NEXT: [[CMP1:%.*]] = icmp ne i32 [[C1]], 0 435; ZEROTHRESH-NEXT: [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]] 436; ZEROTHRESH-NEXT: [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]] 437; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <2 x float> undef, float [[S0]], i32 0 438; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <2 x float> [[RA]], float [[S1]], i32 1 439; ZEROTHRESH-NEXT: ret <2 x float> [[RB]] 440; 441 %c0 = extractelement <2 x i32> %c, i32 0 442 %c1 = extractelement <2 x i32> %c, i32 1 443 %a0 = extractelement <2 x float> %a, i32 0 444 %a1 = extractelement <2 x float> %a, i32 1 445 %b0 = extractelement <2 x float> %b, i32 0 446 %b1 = extractelement <2 x float> %b, i32 1 447 %cmp0 = icmp ne i32 %c0, 0 448 %cmp1 = icmp ne i32 %c1, 0 449 %s0 = select i1 %cmp0, float %a0, float %b0 450 %s1 = select i1 %cmp1, float %a1, float %b1 451 %ra = insertelement <2 x float> undef, float %s0, i32 0 452 %rb = insertelement <2 x float> %ra, float %s1, i32 1 453 ret <2 x float> %rb 454} 455 456; Make sure when we construct partial vectors, we don't keep 457; re-visiting the insertelement chains starting with undef 458; (low cost threshold needed to force this to happen) 459define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 { 460; CHECK-LABEL: @simple_select_partial_vector( 461; CHECK-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0 462; CHECK-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1 463; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0 464; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1 465; CHECK-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0 466; CHECK-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1 467; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0 468; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 469; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer 470; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0 471; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1 472; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0 473; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1 474; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]] 475; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 476; CHECK-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i32 0 477; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 478; CHECK-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP10]], i32 1 479; CHECK-NEXT: ret <4 x float> [[RB]] 480; 481; ZEROTHRESH-LABEL: @simple_select_partial_vector( 482; ZEROTHRESH-NEXT: [[C0:%.*]] = extractelement <4 x i32> %c, i32 0 483; ZEROTHRESH-NEXT: [[C1:%.*]] = extractelement <4 x i32> %c, i32 1 484; ZEROTHRESH-NEXT: [[A0:%.*]] = extractelement <4 x float> %a, i32 0 485; ZEROTHRESH-NEXT: [[A1:%.*]] = extractelement <4 x float> %a, i32 1 486; ZEROTHRESH-NEXT: [[B0:%.*]] = extractelement <4 x float> %b, i32 0 487; ZEROTHRESH-NEXT: [[B1:%.*]] = extractelement <4 x float> %b, i32 1 488; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0 489; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1 490; ZEROTHRESH-NEXT: [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer 491; ZEROTHRESH-NEXT: [[TMP4:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0 492; ZEROTHRESH-NEXT: [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1 493; ZEROTHRESH-NEXT: [[TMP6:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0 494; ZEROTHRESH-NEXT: [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1 495; ZEROTHRESH-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]] 496; ZEROTHRESH-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0 497; ZEROTHRESH-NEXT: [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i32 0 498; ZEROTHRESH-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1 499; ZEROTHRESH-NEXT: [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP10]], i32 1 500; ZEROTHRESH-NEXT: ret <4 x float> [[RB]] 501; 502 %c0 = extractelement <4 x i32> %c, i32 0 503 %c1 = extractelement <4 x i32> %c, i32 1 504 %a0 = extractelement <4 x float> %a, i32 0 505 %a1 = extractelement <4 x float> %a, i32 1 506 %b0 = extractelement <4 x float> %b, i32 0 507 %b1 = extractelement <4 x float> %b, i32 1 508 %1 = insertelement <2 x i32> undef, i32 %c0, i32 0 509 %2 = insertelement <2 x i32> %1, i32 %c1, i32 1 510 %3 = icmp ne <2 x i32> %2, zeroinitializer 511 %4 = insertelement <2 x float> undef, float %a0, i32 0 512 %5 = insertelement <2 x float> %4, float %a1, i32 1 513 %6 = insertelement <2 x float> undef, float %b0, i32 0 514 %7 = insertelement <2 x float> %6, float %b1, i32 1 515 %8 = select <2 x i1> %3, <2 x float> %5, <2 x float> %7 516 %9 = extractelement <2 x float> %8, i32 0 517 %ra = insertelement <4 x float> undef, float %9, i32 0 518 %10 = extractelement <2 x float> %8, i32 1 519 %rb = insertelement <4 x float> %ra, float %10, i32 1 520 ret <4 x float> %rb 521} 522 523; Make sure that vectorization happens even if insertelements operations 524; must be rescheduled. The case here is from compiling Julia. 525define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) { 526; CHECK-LABEL: @reschedule_extract( 527; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b 528; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 529; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 530; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1 531; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> [[V0]], float [[TMP3]], i32 1 532; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 533; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[TMP4]], i32 2 534; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 535; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[TMP5]], i32 3 536; CHECK-NEXT: ret <4 x float> [[V3]] 537; 538; ZEROTHRESH-LABEL: @reschedule_extract( 539; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b 540; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 541; ZEROTHRESH-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 542; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1 543; ZEROTHRESH-NEXT: [[V1:%.*]] = insertelement <4 x float> [[V0]], float [[TMP3]], i32 1 544; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 545; ZEROTHRESH-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[TMP4]], i32 2 546; ZEROTHRESH-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 547; ZEROTHRESH-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[TMP5]], i32 3 548; ZEROTHRESH-NEXT: ret <4 x float> [[V3]] 549; 550 %a0 = extractelement <4 x float> %a, i32 0 551 %b0 = extractelement <4 x float> %b, i32 0 552 %c0 = fadd float %a0, %b0 553 %v0 = insertelement <4 x float> undef, float %c0, i32 0 554 %a1 = extractelement <4 x float> %a, i32 1 555 %b1 = extractelement <4 x float> %b, i32 1 556 %c1 = fadd float %a1, %b1 557 %v1 = insertelement <4 x float> %v0, float %c1, i32 1 558 %a2 = extractelement <4 x float> %a, i32 2 559 %b2 = extractelement <4 x float> %b, i32 2 560 %c2 = fadd float %a2, %b2 561 %v2 = insertelement <4 x float> %v1, float %c2, i32 2 562 %a3 = extractelement <4 x float> %a, i32 3 563 %b3 = extractelement <4 x float> %b, i32 3 564 %c3 = fadd float %a3, %b3 565 %v3 = insertelement <4 x float> %v2, float %c3, i32 3 566 ret <4 x float> %v3 567} 568 569; Check that cost model for vectorization takes credit for 570; instructions that are erased. 571define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) { 572; CHECK-LABEL: @take_credit( 573; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b 574; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 575; CHECK-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 576; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1 577; CHECK-NEXT: [[V1:%.*]] = insertelement <4 x float> [[V0]], float [[TMP3]], i32 1 578; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 579; CHECK-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[TMP4]], i32 2 580; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 581; CHECK-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[TMP5]], i32 3 582; CHECK-NEXT: ret <4 x float> [[V3]] 583; 584; ZEROTHRESH-LABEL: @take_credit( 585; ZEROTHRESH-NEXT: [[TMP1:%.*]] = fadd <4 x float> %a, %b 586; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 587; ZEROTHRESH-NEXT: [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0 588; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1 589; ZEROTHRESH-NEXT: [[V1:%.*]] = insertelement <4 x float> [[V0]], float [[TMP3]], i32 1 590; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 591; ZEROTHRESH-NEXT: [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[TMP4]], i32 2 592; ZEROTHRESH-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 593; ZEROTHRESH-NEXT: [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[TMP5]], i32 3 594; ZEROTHRESH-NEXT: ret <4 x float> [[V3]] 595; 596 %a0 = extractelement <4 x float> %a, i32 0 597 %b0 = extractelement <4 x float> %b, i32 0 598 %c0 = fadd float %a0, %b0 599 %a1 = extractelement <4 x float> %a, i32 1 600 %b1 = extractelement <4 x float> %b, i32 1 601 %c1 = fadd float %a1, %b1 602 %a2 = extractelement <4 x float> %a, i32 2 603 %b2 = extractelement <4 x float> %b, i32 2 604 %c2 = fadd float %a2, %b2 605 %a3 = extractelement <4 x float> %a, i32 3 606 %b3 = extractelement <4 x float> %b, i32 3 607 %c3 = fadd float %a3, %b3 608 %v0 = insertelement <4 x float> undef, float %c0, i32 0 609 %v1 = insertelement <4 x float> %v0, float %c1, i32 1 610 %v2 = insertelement <4 x float> %v1, float %c2, i32 2 611 %v3 = insertelement <4 x float> %v2, float %c3, i32 3 612 ret <4 x float> %v3 613} 614 615; Make sure we handle multiple trees that feed one build vector correctly. 616define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) { 617; CHECK-LABEL: @multi_tree( 618; CHECK-NEXT: entry: 619; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0 620; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1 621; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2 622; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3 623; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00> 624; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[TMP4]] 625; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0 626; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP6]], i32 3 627; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP5]], i32 1 628; CHECK-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP7]], i32 2 629; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[TMP5]], i32 2 630; CHECK-NEXT: [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP8]], i32 1 631; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP5]], i32 3 632; CHECK-NEXT: [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP9]], i32 0 633; CHECK-NEXT: ret <4 x double> [[I4]] 634; 635; ZEROTHRESH-LABEL: @multi_tree( 636; ZEROTHRESH-NEXT: entry: 637; ZEROTHRESH-NEXT: [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0 638; ZEROTHRESH-NEXT: [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1 639; ZEROTHRESH-NEXT: [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2 640; ZEROTHRESH-NEXT: [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3 641; ZEROTHRESH-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00> 642; ZEROTHRESH-NEXT: [[TMP5:%.*]] = fmul <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[TMP4]] 643; ZEROTHRESH-NEXT: [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0 644; ZEROTHRESH-NEXT: [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP6]], i32 3 645; ZEROTHRESH-NEXT: [[TMP7:%.*]] = extractelement <4 x double> [[TMP5]], i32 1 646; ZEROTHRESH-NEXT: [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP7]], i32 2 647; ZEROTHRESH-NEXT: [[TMP8:%.*]] = extractelement <4 x double> [[TMP5]], i32 2 648; ZEROTHRESH-NEXT: [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP8]], i32 1 649; ZEROTHRESH-NEXT: [[TMP9:%.*]] = extractelement <4 x double> [[TMP5]], i32 3 650; ZEROTHRESH-NEXT: [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP9]], i32 0 651; ZEROTHRESH-NEXT: ret <4 x double> [[I4]] 652; 653entry: 654 %t0 = fadd double %w , 0.000000e+00 655 %t1 = fadd double %x , 1.000000e+00 656 %t2 = fadd double %y , 2.000000e+00 657 %t3 = fadd double %z , 3.000000e+00 658 %t4 = fmul double %t0, 1.000000e+00 659 %i1 = insertelement <4 x double> undef, double %t4, i32 3 660 %t5 = fmul double %t1, 1.000000e+00 661 %i2 = insertelement <4 x double> %i1, double %t5, i32 2 662 %t6 = fmul double %t2, 1.000000e+00 663 %i3 = insertelement <4 x double> %i2, double %t6, i32 1 664 %t7 = fmul double %t3, 1.000000e+00 665 %i4 = insertelement <4 x double> %i3, double %t7, i32 0 666 ret <4 x double> %i4 667} 668 669define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr #0 { 670; CHECK-LABEL: @_vadd256( 671; CHECK-NEXT: entry: 672; CHECK-NEXT: [[TMP0:%.*]] = fadd <8 x float> %a, %b 673; CHECK-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0 674; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0 675; CHECK-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1 676; CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP2]], i32 1 677; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP0]], i32 2 678; CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT1_I]], float [[TMP3]], i32 2 679; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP0]], i32 3 680; CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP4]], i32 3 681; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[TMP0]], i32 4 682; CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP5]], i32 4 683; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[TMP0]], i32 5 684; CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP6]], i32 5 685; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP0]], i32 6 686; CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP7]], i32 6 687; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[TMP0]], i32 7 688; CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP8]], i32 7 689; CHECK-NEXT: ret <8 x float> [[VECINIT7_I]] 690; 691; ZEROTHRESH-LABEL: @_vadd256( 692; ZEROTHRESH-NEXT: entry: 693; ZEROTHRESH-NEXT: [[TMP0:%.*]] = fadd <8 x float> %a, %b 694; ZEROTHRESH-NEXT: [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0 695; ZEROTHRESH-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0 696; ZEROTHRESH-NEXT: [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1 697; ZEROTHRESH-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP2]], i32 1 698; ZEROTHRESH-NEXT: [[TMP3:%.*]] = extractelement <8 x float> [[TMP0]], i32 2 699; ZEROTHRESH-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT1_I]], float [[TMP3]], i32 2 700; ZEROTHRESH-NEXT: [[TMP4:%.*]] = extractelement <8 x float> [[TMP0]], i32 3 701; ZEROTHRESH-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP4]], i32 3 702; ZEROTHRESH-NEXT: [[TMP5:%.*]] = extractelement <8 x float> [[TMP0]], i32 4 703; ZEROTHRESH-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP5]], i32 4 704; ZEROTHRESH-NEXT: [[TMP6:%.*]] = extractelement <8 x float> [[TMP0]], i32 5 705; ZEROTHRESH-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP6]], i32 5 706; ZEROTHRESH-NEXT: [[TMP7:%.*]] = extractelement <8 x float> [[TMP0]], i32 6 707; ZEROTHRESH-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP7]], i32 6 708; ZEROTHRESH-NEXT: [[TMP8:%.*]] = extractelement <8 x float> [[TMP0]], i32 7 709; ZEROTHRESH-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP8]], i32 7 710; ZEROTHRESH-NEXT: ret <8 x float> [[VECINIT7_I]] 711; 712 entry: 713 %vecext = extractelement <8 x float> %a, i32 0 714 %vecext1 = extractelement <8 x float> %b, i32 0 715 %add = fadd float %vecext, %vecext1 716 %vecext2 = extractelement <8 x float> %a, i32 1 717 %vecext3 = extractelement <8 x float> %b, i32 1 718 %add4 = fadd float %vecext2, %vecext3 719 %vecext5 = extractelement <8 x float> %a, i32 2 720 %vecext6 = extractelement <8 x float> %b, i32 2 721 %add7 = fadd float %vecext5, %vecext6 722 %vecext8 = extractelement <8 x float> %a, i32 3 723 %vecext9 = extractelement <8 x float> %b, i32 3 724 %add10 = fadd float %vecext8, %vecext9 725 %vecext11 = extractelement <8 x float> %a, i32 4 726 %vecext12 = extractelement <8 x float> %b, i32 4 727 %add13 = fadd float %vecext11, %vecext12 728 %vecext14 = extractelement <8 x float> %a, i32 5 729 %vecext15 = extractelement <8 x float> %b, i32 5 730 %add16 = fadd float %vecext14, %vecext15 731 %vecext17 = extractelement <8 x float> %a, i32 6 732 %vecext18 = extractelement <8 x float> %b, i32 6 733 %add19 = fadd float %vecext17, %vecext18 734 %vecext20 = extractelement <8 x float> %a, i32 7 735 %vecext21 = extractelement <8 x float> %b, i32 7 736 %add22 = fadd float %vecext20, %vecext21 737 %vecinit.i = insertelement <8 x float> undef, float %add, i32 0 738 %vecinit1.i = insertelement <8 x float> %vecinit.i, float %add4, i32 1 739 %vecinit2.i = insertelement <8 x float> %vecinit1.i, float %add7, i32 2 740 %vecinit3.i = insertelement <8 x float> %vecinit2.i, float %add10, i32 3 741 %vecinit4.i = insertelement <8 x float> %vecinit3.i, float %add13, i32 4 742 %vecinit5.i = insertelement <8 x float> %vecinit4.i, float %add16, i32 5 743 %vecinit6.i = insertelement <8 x float> %vecinit5.i, float %add19, i32 6 744 %vecinit7.i = insertelement <8 x float> %vecinit6.i, float %add22, i32 7 745 ret <8 x float> %vecinit7.i 746} 747 748attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } 749