1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -verify | FileCheck %s -check-prefix=ENABLED 3; 4; Without supernode operand reordering, this does not get fully vectorized. 5; S[0] = (A[0] + B[0]) + C[0] 6; S[1] = (B[1] + C[1]) + A[1] 7define void @test_supernode_add(double* %Aarray, double* %Barray, double *%Carray, double *%Sarray) { 8; ENABLED-LABEL: @test_supernode_add( 9; ENABLED-NEXT: entry: 10; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0 11; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1 12; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0 13; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1 14; ENABLED-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0 15; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1 16; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0 17; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1 18; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8 19; ENABLED-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8 20; ENABLED-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>* 21; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 22; ENABLED-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8 23; ENABLED-NEXT: [[C1:%.*]] = load double, double* [[IDXC1]], align 8 24; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0 25; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[C1]], i32 1 26; ENABLED-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP1]] 27; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0 28; ENABLED-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A1]], i32 1 29; ENABLED-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]] 30; ENABLED-NEXT: [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>* 31; ENABLED-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8 32; ENABLED-NEXT: ret void 33; 34entry: 35 %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0 36 %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1 37 %idxB0 = getelementptr inbounds double, double* %Barray, i64 0 38 %idxB1 = getelementptr inbounds double, double* %Barray, i64 1 39 %idxC0 = getelementptr inbounds double, double* %Carray, i64 0 40 %idxC1 = getelementptr inbounds double, double* %Carray, i64 1 41 %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0 42 %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1 43 44 %A0 = load double, double *%idxA0, align 8 45 %A1 = load double, double *%idxA1, align 8 46 47 %B0 = load double, double *%idxB0, align 8 48 %B1 = load double, double *%idxB1, align 8 49 50 %C0 = load double, double *%idxC0, align 8 51 %C1 = load double, double *%idxC1, align 8 52 53 %addA0B0 = fadd fast double %A0, %B0 54 %addB1C1 = fadd fast double %B1, %C1 55 %add0 = fadd fast double %addA0B0, %C0 56 %add1 = fadd fast double %addB1C1, %A1 57 store double %add0, double *%idxS0, align 8 58 store double %add1, double *%idxS1, align 8 59 ret void 60} 61 62 63; Without supernode operand reordering, this does not get fully vectorized. 64; S[0] = (A[0] - B[0]) + C[0] 65; S[1] = (C[1] - B[1]) + A[1] 66define void @test_supernode_addsub(double* %Aarray, double* %Barray, double *%Carray, double *%Sarray) { 67; ENABLED-LABEL: @test_supernode_addsub( 68; ENABLED-NEXT: entry: 69; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0 70; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1 71; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0 72; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1 73; ENABLED-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0 74; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1 75; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0 76; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1 77; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8 78; ENABLED-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8 79; ENABLED-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>* 80; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 81; ENABLED-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8 82; ENABLED-NEXT: [[C1:%.*]] = load double, double* [[IDXC1]], align 8 83; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0 84; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[C1]], i32 1 85; ENABLED-NEXT: [[TMP4:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP1]] 86; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0 87; ENABLED-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A1]], i32 1 88; ENABLED-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]] 89; ENABLED-NEXT: [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>* 90; ENABLED-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8 91; ENABLED-NEXT: ret void 92; 93entry: 94 %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0 95 %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1 96 %idxB0 = getelementptr inbounds double, double* %Barray, i64 0 97 %idxB1 = getelementptr inbounds double, double* %Barray, i64 1 98 %idxC0 = getelementptr inbounds double, double* %Carray, i64 0 99 %idxC1 = getelementptr inbounds double, double* %Carray, i64 1 100 %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0 101 %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1 102 103 %A0 = load double, double *%idxA0, align 8 104 %A1 = load double, double *%idxA1, align 8 105 106 %B0 = load double, double *%idxB0, align 8 107 %B1 = load double, double *%idxB1, align 8 108 109 %C0 = load double, double *%idxC0, align 8 110 %C1 = load double, double *%idxC1, align 8 111 112 %subA0B0 = fsub fast double %A0, %B0 113 %subC1B1 = fsub fast double %C1, %B1 114 %add0 = fadd fast double %subA0B0, %C0 115 %add1 = fadd fast double %subC1B1, %A1 116 store double %add0, double *%idxS0, align 8 117 store double %add1, double *%idxS1, align 8 118 ret void 119} 120 121; Without supernode operand reordering, this does not get fully vectorized. 122; This checks that the super-node works with alternate sequences. 123; 124; S[0] = (A[0] - B[0]) - C[0] 125; S[1] = (B[1] + C[1]) + A[1] 126define void @test_supernode_addsub_alt(double* %Aarray, double* %Barray, double *%Carray, double *%Sarray) { 127; ENABLED-LABEL: @test_supernode_addsub_alt( 128; ENABLED-NEXT: entry: 129; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0 130; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1 131; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0 132; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1 133; ENABLED-NEXT: [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0 134; ENABLED-NEXT: [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1 135; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0 136; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1 137; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8 138; ENABLED-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8 139; ENABLED-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8 140; ENABLED-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8 141; ENABLED-NEXT: [[C0:%.*]] = load double, double* [[IDXC0]], align 8 142; ENABLED-NEXT: [[C1:%.*]] = load double, double* [[IDXC1]], align 8 143; ENABLED-NEXT: [[SUBA0B0:%.*]] = fsub fast double [[A0]], [[B0]] 144; ENABLED-NEXT: [[ADDB1C1:%.*]] = fadd fast double [[B1]], [[C1]] 145; ENABLED-NEXT: [[SUB0:%.*]] = fsub fast double [[SUBA0B0]], [[C0]] 146; ENABLED-NEXT: [[ADD1:%.*]] = fadd fast double [[ADDB1C1]], [[A1]] 147; ENABLED-NEXT: store double [[SUB0]], double* [[IDXS0]], align 8 148; ENABLED-NEXT: store double [[ADD1]], double* [[IDXS1]], align 8 149; ENABLED-NEXT: ret void 150; 151entry: 152 %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0 153 %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1 154 %idxB0 = getelementptr inbounds double, double* %Barray, i64 0 155 %idxB1 = getelementptr inbounds double, double* %Barray, i64 1 156 %idxC0 = getelementptr inbounds double, double* %Carray, i64 0 157 %idxC1 = getelementptr inbounds double, double* %Carray, i64 1 158 %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0 159 %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1 160 161 %A0 = load double, double *%idxA0, align 8 162 %A1 = load double, double *%idxA1, align 8 163 164 %B0 = load double, double *%idxB0, align 8 165 %B1 = load double, double *%idxB1, align 8 166 167 %C0 = load double, double *%idxC0, align 8 168 %C1 = load double, double *%idxC1, align 8 169 170 %subA0B0 = fsub fast double %A0, %B0 171 %addB1C1 = fadd fast double %B1, %C1 172 %sub0 = fsub fast double %subA0B0, %C0 173 %add1 = fadd fast double %addB1C1, %A1 174 store double %sub0, double *%idxS0, align 8 175 store double %add1, double *%idxS1, align 8 176 ret void 177} 178 179; This checks that vectorizeTree() works correctly with the supernode 180; and does not generate uses before defs. 181; If all of the operands of the supernode are vectorizable, then the scheduler 182; will fix their position in the program. If not, then the scheduler may not 183; touch them, leading to uses before defs. 184; 185; A0 = ... 186; C = ... 187; t1 = A0 + C 188; B0 = ... 189; t2 = t1 + B0 190; A1 = ... 191; B1 = ... 192; t3 = A1 + B1 193; D = ... 194; t4 = t3 + D 195; 196; 197; A0 C A1 B1 A0 C A1 D A0:1 C,D 198; \ / \ / Reorder \ / \ / Bundles \ / 199; t1 + B0 t3 + D -------> t1 + B0 t3 + B1 ------> t1:3 + B0:1 200; |/ |/ |/ |/ |/ 201; t2 + t4 + t2 + t4 + t2:4 + 202; 203; After reordering, 'D' conceptually becomes an operand of t3: 204; t3 = A1 + D 205; But D is defined *after* its use. 206; 207define void @supernode_scheduling(double* %Aarray, double* %Barray, double *%Carray, double *%Darray, double *%Sarray) { 208; ENABLED-LABEL: @supernode_scheduling( 209; ENABLED-NEXT: entry: 210; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0 211; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1 212; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0 213; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1 214; ENABLED-NEXT: [[IDXC:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0 215; ENABLED-NEXT: [[IDXD:%.*]] = getelementptr inbounds double, double* [[DARRAY:%.*]], i64 0 216; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0 217; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1 218; ENABLED-NEXT: [[C:%.*]] = load double, double* [[IDXC]], align 8 219; ENABLED-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8 220; ENABLED-NEXT: [[TMP0:%.*]] = bitcast double* [[IDXA0]] to <2 x double>* 221; ENABLED-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8 222; ENABLED-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8 223; ENABLED-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0 224; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B1]], i32 1 225; ENABLED-NEXT: [[TMP4:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]] 226; ENABLED-NEXT: [[D:%.*]] = load double, double* [[IDXD]], align 8 227; ENABLED-NEXT: [[TMP5:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 228; ENABLED-NEXT: [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[D]], i32 1 229; ENABLED-NEXT: [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]] 230; ENABLED-NEXT: [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>* 231; ENABLED-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8 232; ENABLED-NEXT: ret void 233; 234entry: 235 %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0 236 %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1 237 %idxB0 = getelementptr inbounds double, double* %Barray, i64 0 238 %idxB1 = getelementptr inbounds double, double* %Barray, i64 1 239 %idxC = getelementptr inbounds double, double* %Carray, i64 0 240 %idxD = getelementptr inbounds double, double* %Darray, i64 0 241 %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0 242 %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1 243 244 245 %A0 = load double, double *%idxA0, align 8 246 %C = load double, double *%idxC, align 8 247 %t1 = fadd fast double %A0, %C 248 %B0 = load double, double *%idxB0, align 8 249 %t2 = fadd fast double %t1, %B0 250 %A1 = load double, double *%idxA1, align 8 251 %B1 = load double, double *%idxB1, align 8 252 %t3 = fadd fast double %A1, %B1 253 %D = load double, double *%idxD, align 8 254 %t4 = fadd fast double %t3, %D 255 256 store double %t2, double *%idxS0, align 8 257 store double %t4, double *%idxS1, align 8 258 ret void 259} 260 261 262; The SLP scheduler has trouble moving instructions across blocks. 263; Even though we can build a SuperNode for this example, we should not because the scheduler 264; cannot handle the cross-block instruction motion that is required once the operands of the 265; SuperNode are reordered. 266; 267; bb1: 268; A0 = ... 269; B1 = ... 270; Tmp0 = A0 + 2.0 271; Tmp1 = B1 + 2.0 272; 273; bb2: 274; A1 = ... 275; B0 = ... 276; S[0] = Tmp0 + B0 277; S[1] = Tmp1 + A1 278define void @supernode_scheduling_cross_block(double* %Aarray, double* %Barray, double *%Sarray) { 279; ENABLED-LABEL: @supernode_scheduling_cross_block( 280; ENABLED-NEXT: entry: 281; ENABLED-NEXT: [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0 282; ENABLED-NEXT: [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1 283; ENABLED-NEXT: [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0 284; ENABLED-NEXT: [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1 285; ENABLED-NEXT: [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0 286; ENABLED-NEXT: [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1 287; ENABLED-NEXT: [[A0:%.*]] = load double, double* [[IDXA0]], align 8 288; ENABLED-NEXT: [[B1:%.*]] = load double, double* [[IDXB1]], align 8 289; ENABLED-NEXT: [[TMP0:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0 290; ENABLED-NEXT: [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[B1]], i32 1 291; ENABLED-NEXT: [[TMP2:%.*]] = fadd fast <2 x double> [[TMP1]], <double 2.000000e+00, double 2.000000e+00> 292; ENABLED-NEXT: br label [[BB:%.*]] 293; ENABLED: bb: 294; ENABLED-NEXT: [[A1:%.*]] = load double, double* [[IDXA1]], align 8 295; ENABLED-NEXT: [[B0:%.*]] = load double, double* [[IDXB0]], align 8 296; ENABLED-NEXT: [[TMP3:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0 297; ENABLED-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[A1]], i32 1 298; ENABLED-NEXT: [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP4]] 299; ENABLED-NEXT: [[TMP6:%.*]] = bitcast double* [[IDXS0]] to <2 x double>* 300; ENABLED-NEXT: store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8 301; ENABLED-NEXT: ret void 302; 303entry: 304 %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0 305 %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1 306 %idxB0 = getelementptr inbounds double, double* %Barray, i64 0 307 %idxB1 = getelementptr inbounds double, double* %Barray, i64 1 308 %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0 309 %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1 310 311 %A0 = load double, double *%idxA0, align 8 312 %B1 = load double, double *%idxB1, align 8 313 %Tmp0 = fadd fast double %A0, 2.0 314 %Tmp1 = fadd fast double %B1, 2.0 315br label %bb 316 317bb: 318 %A1 = load double, double *%idxA1, align 8 319 %B0 = load double, double *%idxB0, align 8 320 321 %Sum0 = fadd fast double %Tmp0, %B0 322 %Sum1 = fadd fast double %Tmp1, %A1 323 324 store double %Sum0, double *%idxS0, align 8 325 store double %Sum1, double *%idxS1, align 8 326 ret void 327} 328