1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -slp-vectorizer -slp-threshold=-18 -dce -instcombine -pass-remarks-output=%t < %s | FileCheck %s 3; RUN: cat %t | FileCheck -check-prefix=YAML %s 4; RUN: opt -S -passes='slp-vectorizer,dce,instcombine' -slp-threshold=-18 -pass-remarks-output=%t < %s | FileCheck %s 5; RUN: cat %t | FileCheck -check-prefix=YAML %s 6 7 8target datalayout = "e-m:e-i32:64-i128:128-n32:64-S128" 9target triple = "aarch64--linux-gnu" 10 11; These tests check that we remove from consideration pairs of seed 12; getelementptrs when they are known to have a constant difference. Such pairs 13; are likely not good candidates for vectorization since one can be computed 14; from the other. We use an unprofitable threshold to force vectorization. 15; 16; int getelementptr(int *g, int n, int w, int x, int y, int z) { 17; int sum = 0; 18; for (int i = 0; i < n ; ++i) { 19; sum += g[2*i + w]; sum += g[2*i + x]; 20; sum += g[2*i + y]; sum += g[2*i + z]; 21; } 22; return sum; 23; } 24; 25 26; YAML-LABEL: Function: getelementptr_4x32 27; YAML-NEXT: Args: 28; YAML-NEXT: - String: 'SLP vectorized with cost ' 29; YAML-NEXT: - Cost: '11' 30; YAML-NEXT: - String: ' and with tree size ' 31; YAML-NEXT: - TreeSize: '5' 32 33; YAML: --- !Passed 34; YAML-NEXT: Pass: slp-vectorizer 35; YAML-NEXT: Name: VectorizedList 36; YAML-NEXT: Function: getelementptr_4x32 37; YAML-NEXT: Args: 38; YAML-NEXT: - String: 'SLP vectorized with cost ' 39; YAML-NEXT: - Cost: '16' 40; YAML-NEXT: - String: ' and with tree size ' 41; YAML-NEXT: - TreeSize: '3' 42 43define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) { 44; CHECK-LABEL: @getelementptr_4x32( 45; CHECK-NEXT: entry: 46; CHECK-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0 47; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] 48; CHECK: for.body.preheader: 49; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>, i32 [[X:%.*]], i32 1 50; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> [[TMP0]], i32 [[Y:%.*]], i32 2 51; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[Z:%.*]], i32 3 52; CHECK-NEXT: br label [[FOR_BODY:%.*]] 53; CHECK: for.cond.cleanup.loopexit: 54; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP20:%.*]], i32 1 55; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] 56; CHECK: for.cond.cleanup: 57; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ] 58; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 59; CHECK: for.body: 60; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP20]], [[FOR_BODY]] ] 61; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0 62; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[TMP5]], 1 63; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[T4]], i32 0 64; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> undef, <4 x i32> zeroinitializer 65; CHECK-NEXT: [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], [[TMP2]] 66; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP8]], i32 0 67; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64 68; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[G:%.*]], i64 [[TMP10]] 69; CHECK-NEXT: [[T6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 70; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1 71; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], [[TMP11]] 72; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP8]], i32 1 73; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 74; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP13]] 75; CHECK-NEXT: [[T8:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4 76; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]] 77; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP8]], i32 2 78; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 79; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP15]] 80; CHECK-NEXT: [[T10:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4 81; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]] 82; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP8]], i32 3 83; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 84; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP17]] 85; CHECK-NEXT: [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4 86; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[ADD11]], i32 1 87; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i32> <i32 1, i32 poison>, i32 [[T12]], i32 1 88; CHECK-NEXT: [[TMP20]] = add nsw <2 x i32> [[TMP18]], [[TMP19]] 89; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x i32> [[TMP20]], i32 0 90; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP21]], [[N]] 91; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] 92; 93entry: 94 %cmp31 = icmp sgt i32 %n, 0 95 br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup 96 97for.body.preheader: 98 br label %for.body 99 100for.cond.cleanup.loopexit: 101 br label %for.cond.cleanup 102 103for.cond.cleanup: 104 %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ] 105 ret i32 %sum.0.lcssa 106 107for.body: 108 %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 109 %sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ] 110 %t4 = shl nsw i32 %indvars.iv, 1 111 %t5 = add nsw i32 %t4, 0 112 %arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5 113 %t6 = load i32, i32* %arrayidx, align 4 114 %add1 = add nsw i32 %t6, %sum.032 115 %t7 = add nsw i32 %t4, %x 116 %arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7 117 %t8 = load i32, i32* %arrayidx5, align 4 118 %add6 = add nsw i32 %add1, %t8 119 %t9 = add nsw i32 %t4, %y 120 %arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9 121 %t10 = load i32, i32* %arrayidx10, align 4 122 %add11 = add nsw i32 %add6, %t10 123 %t11 = add nsw i32 %t4, %z 124 %arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11 125 %t12 = load i32, i32* %arrayidx15, align 4 126 %add16 = add nsw i32 %add11, %t12 127 %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 128 %exitcond = icmp eq i32 %indvars.iv.next , %n 129 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body 130} 131 132; YAML-LABEL: Function: getelementptr_2x32 133; YAML-NEXT: Args: 134; YAML-NEXT: - String: 'SLP vectorized with cost ' 135; YAML-NEXT: - Cost: '11' 136; YAML-NEXT: - String: ' and with tree size ' 137; YAML-NEXT: - TreeSize: '5' 138 139; YAML: --- !Passed 140; YAML-NEXT: Pass: slp-vectorizer 141; YAML-NEXT: Name: VectorizedList 142; YAML-NEXT: Function: getelementptr_2x32 143; YAML-NEXT: Args: 144; YAML-NEXT: - String: 'SLP vectorized with cost ' 145; YAML-NEXT: - Cost: '6' 146; YAML-NEXT: - String: ' and with tree size ' 147; YAML-NEXT: - TreeSize: '3' 148 149define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) { 150; CHECK-LABEL: @getelementptr_2x32( 151; CHECK-NEXT: entry: 152; CHECK-NEXT: [[CMP31:%.*]] = icmp sgt i32 [[N:%.*]], 0 153; CHECK-NEXT: br i1 [[CMP31]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] 154; CHECK: for.body.preheader: 155; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0 156; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Z:%.*]], i32 1 157; CHECK-NEXT: br label [[FOR_BODY:%.*]] 158; CHECK: for.cond.cleanup.loopexit: 159; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[TMP17:%.*]], i32 1 160; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] 161; CHECK: for.cond.cleanup: 162; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ] 163; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 164; CHECK: for.body: 165; CHECK-NEXT: [[TMP3:%.*]] = phi <2 x i32> [ zeroinitializer, [[FOR_BODY_PREHEADER]] ], [ [[TMP17]], [[FOR_BODY]] ] 166; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0 167; CHECK-NEXT: [[T4:%.*]] = shl nsw i32 [[TMP4]], 1 168; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[T4]] to i64 169; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[G:%.*]], i64 [[TMP5]] 170; CHECK-NEXT: [[T6:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 171; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 172; CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[T6]], [[TMP6]] 173; CHECK-NEXT: [[T7:%.*]] = or i32 [[T4]], 1 174; CHECK-NEXT: [[TMP7:%.*]] = sext i32 [[T7]] to i64 175; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP7]] 176; CHECK-NEXT: [[T8:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4 177; CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[ADD1]], [[T8]] 178; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[T4]], i32 0 179; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> undef, <2 x i32> zeroinitializer 180; CHECK-NEXT: [[TMP10:%.*]] = add nsw <2 x i32> [[TMP9]], [[TMP1]] 181; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i32> [[TMP10]], i32 0 182; CHECK-NEXT: [[TMP12:%.*]] = sext i32 [[TMP11]] to i64 183; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP12]] 184; CHECK-NEXT: [[T10:%.*]] = load i32, i32* [[ARRAYIDX10]], align 4 185; CHECK-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD6]], [[T10]] 186; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[TMP10]], i32 1 187; CHECK-NEXT: [[TMP14:%.*]] = sext i32 [[TMP13]] to i64 188; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i32, i32* [[G]], i64 [[TMP14]] 189; CHECK-NEXT: [[T12:%.*]] = load i32, i32* [[ARRAYIDX15]], align 4 190; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[ADD11]], i32 1 191; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x i32> <i32 1, i32 poison>, i32 [[T12]], i32 1 192; CHECK-NEXT: [[TMP17]] = add nsw <2 x i32> [[TMP15]], [[TMP16]] 193; CHECK-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0 194; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP18]], [[N]] 195; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] 196; 197entry: 198 %cmp31 = icmp sgt i32 %n, 0 199 br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup 200 201for.body.preheader: 202 br label %for.body 203 204for.cond.cleanup.loopexit: 205 br label %for.cond.cleanup 206 207for.cond.cleanup: 208 %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ] 209 ret i32 %sum.0.lcssa 210 211for.body: 212 %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] 213 %sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ] 214 %t4 = shl nsw i32 %indvars.iv, 1 215 %t5 = add nsw i32 %t4, 0 216 %arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5 217 %t6 = load i32, i32* %arrayidx, align 4 218 %add1 = add nsw i32 %t6, %sum.032 219 %t7 = add nsw i32 %t4, 1 220 %arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7 221 %t8 = load i32, i32* %arrayidx5, align 4 222 %add6 = add nsw i32 %add1, %t8 223 %t9 = add nsw i32 %t4, %y 224 %arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9 225 %t10 = load i32, i32* %arrayidx10, align 4 226 %add11 = add nsw i32 %add6, %t10 227 %t11 = add nsw i32 %t4, %z 228 %arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11 229 %t12 = load i32, i32* %arrayidx15, align 4 230 %add16 = add nsw i32 %add11, %t12 231 %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1 232 %exitcond = icmp eq i32 %indvars.iv.next , %n 233 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body 234} 235 236@global = internal global { i32* } zeroinitializer, align 8 237 238; Make sure we vectorize to maximize the load with when loading i16 and 239; extending it for compute operations. 240define void @test_i16_extend(i16* %p.1, i16* %p.2, i32 %idx.i32) { 241; CHECK-LABEL: @test_i16_extend( 242; CHECK-NEXT: [[P_0:%.*]] = load i32*, i32** getelementptr inbounds ({ i32* }, { i32* }* @global, i64 0, i32 0), align 8 243; CHECK-NEXT: [[IDX_0:%.*]] = zext i32 [[IDX_I32:%.*]] to i64 244; CHECK-NEXT: [[T53:%.*]] = getelementptr inbounds i16, i16* [[P_1:%.*]], i64 [[IDX_0]] 245; CHECK-NEXT: [[T56:%.*]] = getelementptr inbounds i16, i16* [[P_2:%.*]], i64 [[IDX_0]] 246; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16* [[T53]] to <8 x i16>* 247; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2 248; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i16> [[TMP2]] to <8 x i32> 249; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[T56]] to <8 x i16>* 250; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[TMP4]], align 2 251; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i16> [[TMP5]] to <8 x i32> 252; CHECK-NEXT: [[TMP7:%.*]] = sub nsw <8 x i32> [[TMP3]], [[TMP6]] 253; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP7]], i32 0 254; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP8]] to i64 255; CHECK-NEXT: [[T60:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP9]] 256; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[T60]], align 4 257; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP7]], i32 1 258; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64 259; CHECK-NEXT: [[T71:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP11]] 260; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[T71]], align 4 261; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP7]], i32 2 262; CHECK-NEXT: [[TMP13:%.*]] = sext i32 [[TMP12]] to i64 263; CHECK-NEXT: [[T82:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP13]] 264; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[T82]], align 4 265; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP7]], i32 3 266; CHECK-NEXT: [[TMP15:%.*]] = sext i32 [[TMP14]] to i64 267; CHECK-NEXT: [[T93:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP15]] 268; CHECK-NEXT: [[L_4:%.*]] = load i32, i32* [[T93]], align 4 269; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP7]], i32 4 270; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP16]] to i64 271; CHECK-NEXT: [[T104:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP17]] 272; CHECK-NEXT: [[L_5:%.*]] = load i32, i32* [[T104]], align 4 273; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP7]], i32 5 274; CHECK-NEXT: [[TMP19:%.*]] = sext i32 [[TMP18]] to i64 275; CHECK-NEXT: [[T115:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP19]] 276; CHECK-NEXT: [[L_6:%.*]] = load i32, i32* [[T115]], align 4 277; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP7]], i32 6 278; CHECK-NEXT: [[TMP21:%.*]] = sext i32 [[TMP20]] to i64 279; CHECK-NEXT: [[T126:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP21]] 280; CHECK-NEXT: [[L_7:%.*]] = load i32, i32* [[T126]], align 4 281; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP7]], i32 7 282; CHECK-NEXT: [[TMP23:%.*]] = sext i32 [[TMP22]] to i64 283; CHECK-NEXT: [[T137:%.*]] = getelementptr inbounds i32, i32* [[P_0]], i64 [[TMP23]] 284; CHECK-NEXT: [[L_8:%.*]] = load i32, i32* [[T137]], align 4 285; CHECK-NEXT: call void @use(i32 [[L_1]], i32 [[L_2]], i32 [[L_3]], i32 [[L_4]], i32 [[L_5]], i32 [[L_6]], i32 [[L_7]], i32 [[L_8]]) 286; CHECK-NEXT: ret void 287; 288 %g = getelementptr inbounds { i32*}, { i32 *}* @global, i64 0, i32 0 289 %p.0 = load i32*, i32** %g, align 8 290 291 %idx.0 = zext i32 %idx.i32 to i64 292 %idx.1 = add nsw i64 %idx.0, 1 293 %idx.2 = add nsw i64 %idx.0, 2 294 %idx.3 = add nsw i64 %idx.0, 3 295 %idx.4 = add nsw i64 %idx.0, 4 296 %idx.5 = add nsw i64 %idx.0, 5 297 %idx.6 = add nsw i64 %idx.0, 6 298 %idx.7 = add nsw i64 %idx.0, 7 299 300 %t53 = getelementptr inbounds i16, i16* %p.1, i64 %idx.0 301 %op1.l = load i16, i16* %t53, align 2 302 %op1.ext = zext i16 %op1.l to i64 303 %t56 = getelementptr inbounds i16, i16* %p.2, i64 %idx.0 304 %op2.l = load i16, i16* %t56, align 2 305 %op2.ext = zext i16 %op2.l to i64 306 %sub.1 = sub nsw i64 %op1.ext, %op2.ext 307 308 %t60 = getelementptr inbounds i32, i32* %p.0, i64 %sub.1 309 %l.1 = load i32, i32* %t60, align 4 310 311 %t64 = getelementptr inbounds i16, i16* %p.1, i64 %idx.1 312 %t65 = load i16, i16* %t64, align 2 313 %t66 = zext i16 %t65 to i64 314 %t67 = getelementptr inbounds i16, i16* %p.2, i64 %idx.1 315 %t68 = load i16, i16* %t67, align 2 316 %t69 = zext i16 %t68 to i64 317 %sub.2 = sub nsw i64 %t66, %t69 318 319 %t71 = getelementptr inbounds i32, i32* %p.0, i64 %sub.2 320 %l.2 = load i32, i32* %t71, align 4 321 322 %t75 = getelementptr inbounds i16, i16* %p.1, i64 %idx.2 323 %t76 = load i16, i16* %t75, align 2 324 %t77 = zext i16 %t76 to i64 325 %t78 = getelementptr inbounds i16, i16* %p.2, i64 %idx.2 326 %t79 = load i16, i16* %t78, align 2 327 %t80 = zext i16 %t79 to i64 328 %sub.3 = sub nsw i64 %t77, %t80 329 330 %t82 = getelementptr inbounds i32, i32* %p.0, i64 %sub.3 331 %l.3 = load i32, i32* %t82, align 4 332 333 %t86 = getelementptr inbounds i16, i16* %p.1, i64 %idx.3 334 %t87 = load i16, i16* %t86, align 2 335 %t88 = zext i16 %t87 to i64 336 %t89 = getelementptr inbounds i16, i16* %p.2, i64 %idx.3 337 %t90 = load i16, i16* %t89, align 2 338 %t91 = zext i16 %t90 to i64 339 %sub.4 = sub nsw i64 %t88, %t91 340 341 %t93 = getelementptr inbounds i32, i32* %p.0, i64 %sub.4 342 %l.4 = load i32, i32* %t93, align 4 343 344 %t97 = getelementptr inbounds i16, i16* %p.1, i64 %idx.4 345 %t98 = load i16, i16* %t97, align 2 346 %t99 = zext i16 %t98 to i64 347 %t100 = getelementptr inbounds i16, i16* %p.2, i64 %idx.4 348 %t101 = load i16, i16* %t100, align 2 349 %t102 = zext i16 %t101 to i64 350 %sub.5 = sub nsw i64 %t99, %t102 351 352 %t104 = getelementptr inbounds i32, i32* %p.0, i64 %sub.5 353 %l.5 = load i32, i32* %t104, align 4 354 355 %t108 = getelementptr inbounds i16, i16* %p.1, i64 %idx.5 356 %t109 = load i16, i16* %t108, align 2 357 %t110 = zext i16 %t109 to i64 358 %t111 = getelementptr inbounds i16, i16* %p.2, i64 %idx.5 359 %t112 = load i16, i16* %t111, align 2 360 %t113 = zext i16 %t112 to i64 361 %sub.6 = sub nsw i64 %t110, %t113 362 363 %t115 = getelementptr inbounds i32, i32* %p.0, i64 %sub.6 364 %l.6 = load i32, i32* %t115, align 4 365 366 %t119 = getelementptr inbounds i16, i16* %p.1, i64 %idx.6 367 %t120 = load i16, i16* %t119, align 2 368 %t121 = zext i16 %t120 to i64 369 %t122 = getelementptr inbounds i16, i16* %p.2, i64 %idx.6 370 %t123 = load i16, i16* %t122, align 2 371 %t124 = zext i16 %t123 to i64 372 %sub.7 = sub nsw i64 %t121, %t124 373 374 %t126 = getelementptr inbounds i32, i32* %p.0, i64 %sub.7 375 %l.7 = load i32, i32* %t126, align 4 376 377 %t130 = getelementptr inbounds i16, i16* %p.1, i64 %idx.7 378 %t131 = load i16, i16* %t130, align 2 379 %t132 = zext i16 %t131 to i64 380 %t133 = getelementptr inbounds i16, i16* %p.2, i64 %idx.7 381 %t134 = load i16, i16* %t133, align 2 382 %t135 = zext i16 %t134 to i64 383 %sub.8 = sub nsw i64 %t132, %t135 384 385 %t137 = getelementptr inbounds i32, i32* %p.0, i64 %sub.8 386 %l.8 = load i32, i32* %t137, align 4 387 388 call void @use(i32 %l.1, i32 %l.2, i32 %l.3, i32 %l.4, i32 %l.5, i32 %l.6, i32 %l.7, i32 %l.8) 389 ret void 390} 391 392declare void @use(i32, i32, i32, i32, i32, i32, i32, i32) 393