1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -mcpu=corei7 -passes='default<O1>' -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1 3; RUN: opt < %s -mcpu=corei7 -passes='default<O2>' -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O2 4; RUN: opt < %s -mcpu=corei7 -passes='default<O3>' -S -unroll-threshold=150 -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3 5; RUN: opt < %s -mcpu=corei7 -passes='default<O3>' -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DEFAULT 6; RUN: opt < %s -mcpu=corei7 -passes='default<Os>' -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Os 7; RUN: opt < %s -mcpu=corei7 -passes='default<Oz>' -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=Oz 8; RUN: opt < %s -mcpu=corei7 -passes='default<O1>,loop-vectorize' -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O1VEC2 9; RUN: opt < %s -mcpu=corei7 -passes='default<Oz>,loop-vectorize' -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=OzVEC2 10; RUN: opt < %s -mcpu=corei7 -passes='default<O3>' -unroll-threshold=150 -vectorize-loops=false -S -unroll-allow-partial=0 | FileCheck %s --check-prefix=O3DIS 11 12; This file tests the llvm.loop.vectorize.enable metadata forcing 13; vectorization even when optimization levels are too low, or when 14; vectorization is disabled. 15 16target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 17target triple = "x86_64-unknown-linux-gnu" 18 19define i32 @enabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { 20; O1-LABEL: @enabled( 21; O1-NEXT: entry: 22; O1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 23; O1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 24; O1-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 25; O1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 26; O1-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 27; O1-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 28; O1-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 29; O1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 30; O1-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 31; O1-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 32; O1-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 33; O1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 34; O1-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 35; O1-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 36; O1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 37; O1-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 38; O1-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 39; O1-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 40; O1-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 41; O1-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 42; O1-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 43; O1-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 44; O1-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 45; O1-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 46; O1-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 47; O1-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 48; O1-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 49; O1-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 50; O1-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 51; O1-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 52; O1-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 53; O1-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 54; O1-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 55; O1-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 56; O1-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 57; O1-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 58; O1-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 59; O1-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 60; O1-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 61; O1-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 62; O1-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 63; O1-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 64; O1-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 65; O1-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 66; O1-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 67; O1-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 68; O1-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 69; O1-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 70; O1-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 71; O1-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 72; O1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 73; O1-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 74; O1-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 75; O1-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 76; O1-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 77; O1-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 78; O1-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 79; O1-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 80; O1-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 81; O1-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 82; O1-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 83; O1-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 84; O1-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 85; O1-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 86; O1-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 87; O1-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 88; O1-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 89; O1-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 90; O1-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 91; O1-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 92; O1-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 93; O1-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 94; O1-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 95; O1-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 96; O1-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 97; O1-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 98; O1-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 99; O1-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 100; O1-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 101; O1-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 102; O1-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 103; O1-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 104; O1-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 105; O1-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 106; O1-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 107; O1-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 108; O1-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 109; O1-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 110; O1-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 111; O1-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 112; O1-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 113; O1-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 114; O1-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 115; O1-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 116; O1-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 117; O1-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 118; O1-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 119; O1-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 120; O1-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 121; O1-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 122; O1-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 123; O1-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 124; O1-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 125; O1-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 126; O1-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 127; O1-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 128; O1-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 129; O1-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 130; O1-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 131; O1-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 132; O1-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 133; O1-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 134; O1-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 135; O1-NEXT: ret i32 [[TMP78]] 136; 137; O2-LABEL: @enabled( 138; O2-NEXT: entry: 139; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 140; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 141; O2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 142; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 143; O2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 144; O2-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 145; O2-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 146; O2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 147; O2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 148; O2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 149; O2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 150; O2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 151; O2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 152; O2-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 153; O2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 154; O2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 155; O2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 156; O2-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 157; O2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 158; O2-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 159; O2-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 160; O2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 161; O2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 162; O2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 163; O2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 164; O2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 165; O2-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 166; O2-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 167; O2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 168; O2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 169; O2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 170; O2-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 171; O2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 172; O2-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 173; O2-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 174; O2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 175; O2-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 176; O2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 177; O2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 178; O2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 179; O2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 180; O2-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 181; O2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 182; O2-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 183; O2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 184; O2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 185; O2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 186; O2-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 187; O2-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 188; O2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 189; O2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 190; O2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 191; O2-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 192; O2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 193; O2-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 194; O2-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 195; O2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 196; O2-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 197; O2-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 198; O2-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 199; O2-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 200; O2-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 201; O2-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 202; O2-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 203; O2-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 204; O2-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 205; O2-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 206; O2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 207; O2-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 208; O2-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 209; O2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 210; O2-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 211; O2-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 212; O2-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 213; O2-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 214; O2-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 215; O2-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 216; O2-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 217; O2-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 218; O2-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 219; O2-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 220; O2-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 221; O2-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 222; O2-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 223; O2-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 224; O2-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 225; O2-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 226; O2-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 227; O2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 228; O2-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 229; O2-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 230; O2-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 231; O2-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 232; O2-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 233; O2-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 234; O2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 235; O2-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 236; O2-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 237; O2-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 238; O2-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 239; O2-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 240; O2-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 241; O2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 242; O2-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 243; O2-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 244; O2-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 245; O2-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 246; O2-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 247; O2-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 248; O2-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 249; O2-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 250; O2-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 251; O2-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 252; O2-NEXT: ret i32 [[TMP78]] 253; 254; O3-LABEL: @enabled( 255; O3-NEXT: entry: 256; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 257; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 258; O3-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 259; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 260; O3-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 261; O3-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 262; O3-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 263; O3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 264; O3-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 265; O3-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 266; O3-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 267; O3-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 268; O3-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 269; O3-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 270; O3-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 271; O3-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 272; O3-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 273; O3-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 274; O3-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 275; O3-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 276; O3-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 277; O3-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 278; O3-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 279; O3-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 280; O3-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 281; O3-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 282; O3-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 283; O3-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 284; O3-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 285; O3-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 286; O3-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 287; O3-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 288; O3-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 289; O3-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 290; O3-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 291; O3-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 292; O3-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 293; O3-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 294; O3-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 295; O3-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 296; O3-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 297; O3-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 298; O3-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 299; O3-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 300; O3-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 301; O3-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 302; O3-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 303; O3-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 304; O3-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 305; O3-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 306; O3-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 307; O3-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 308; O3-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 309; O3-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 310; O3-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 311; O3-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 312; O3-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 313; O3-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 314; O3-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 315; O3-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 316; O3-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 317; O3-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 318; O3-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 319; O3-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 320; O3-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 321; O3-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 322; O3-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 323; O3-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 324; O3-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 325; O3-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 326; O3-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 327; O3-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 328; O3-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 329; O3-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 330; O3-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 331; O3-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 332; O3-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 333; O3-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 334; O3-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 335; O3-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 336; O3-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 337; O3-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 338; O3-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 339; O3-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 340; O3-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 341; O3-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 342; O3-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 343; O3-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 344; O3-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 345; O3-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 346; O3-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 347; O3-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 348; O3-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 349; O3-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 350; O3-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 351; O3-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 352; O3-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 353; O3-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 354; O3-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 355; O3-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 356; O3-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 357; O3-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 358; O3-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 359; O3-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 360; O3-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 361; O3-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 362; O3-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 363; O3-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 364; O3-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 365; O3-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 366; O3-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 367; O3-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 368; O3-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 369; O3-NEXT: ret i32 [[TMP78]] 370; 371; O3DEFAULT-LABEL: @enabled( 372; O3DEFAULT-NEXT: entry: 373; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 374; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 375; O3DEFAULT-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 376; O3DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 377; O3DEFAULT-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 378; O3DEFAULT-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 379; O3DEFAULT-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 380; O3DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 381; O3DEFAULT-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 382; O3DEFAULT-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 383; O3DEFAULT-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 384; O3DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 385; O3DEFAULT-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 386; O3DEFAULT-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 387; O3DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 388; O3DEFAULT-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 389; O3DEFAULT-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 390; O3DEFAULT-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 391; O3DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 392; O3DEFAULT-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 393; O3DEFAULT-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 394; O3DEFAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 395; O3DEFAULT-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 396; O3DEFAULT-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 397; O3DEFAULT-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 398; O3DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 399; O3DEFAULT-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 400; O3DEFAULT-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 401; O3DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 402; O3DEFAULT-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 403; O3DEFAULT-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 404; O3DEFAULT-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 405; O3DEFAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 406; O3DEFAULT-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 407; O3DEFAULT-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 408; O3DEFAULT-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 409; O3DEFAULT-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 410; O3DEFAULT-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 411; O3DEFAULT-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 412; O3DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 413; O3DEFAULT-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 414; O3DEFAULT-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 415; O3DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 416; O3DEFAULT-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 417; O3DEFAULT-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 418; O3DEFAULT-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 419; O3DEFAULT-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 420; O3DEFAULT-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 421; O3DEFAULT-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 422; O3DEFAULT-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 423; O3DEFAULT-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 424; O3DEFAULT-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 425; O3DEFAULT-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 426; O3DEFAULT-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 427; O3DEFAULT-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 428; O3DEFAULT-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 429; O3DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 430; O3DEFAULT-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 431; O3DEFAULT-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 432; O3DEFAULT-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 433; O3DEFAULT-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 434; O3DEFAULT-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 435; O3DEFAULT-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 436; O3DEFAULT-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 437; O3DEFAULT-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 438; O3DEFAULT-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 439; O3DEFAULT-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 440; O3DEFAULT-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 441; O3DEFAULT-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 442; O3DEFAULT-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 443; O3DEFAULT-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 444; O3DEFAULT-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 445; O3DEFAULT-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 446; O3DEFAULT-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 447; O3DEFAULT-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 448; O3DEFAULT-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 449; O3DEFAULT-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 450; O3DEFAULT-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 451; O3DEFAULT-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 452; O3DEFAULT-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 453; O3DEFAULT-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 454; O3DEFAULT-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 455; O3DEFAULT-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 456; O3DEFAULT-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 457; O3DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 458; O3DEFAULT-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 459; O3DEFAULT-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 460; O3DEFAULT-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 461; O3DEFAULT-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 462; O3DEFAULT-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 463; O3DEFAULT-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 464; O3DEFAULT-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 465; O3DEFAULT-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 466; O3DEFAULT-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 467; O3DEFAULT-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 468; O3DEFAULT-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 469; O3DEFAULT-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 470; O3DEFAULT-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 471; O3DEFAULT-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 472; O3DEFAULT-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 473; O3DEFAULT-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 474; O3DEFAULT-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 475; O3DEFAULT-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 476; O3DEFAULT-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 477; O3DEFAULT-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 478; O3DEFAULT-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 479; O3DEFAULT-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 480; O3DEFAULT-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 481; O3DEFAULT-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 482; O3DEFAULT-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 483; O3DEFAULT-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 484; O3DEFAULT-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 485; O3DEFAULT-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 486; O3DEFAULT-NEXT: ret i32 [[TMP78]] 487; 488; Os-LABEL: @enabled( 489; Os-NEXT: entry: 490; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 491; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 492; Os-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 493; Os-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 494; Os-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 495; Os-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 496; Os-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 497; Os-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 498; Os-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 499; Os-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 500; Os-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 501; Os-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 502; Os-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 503; Os-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 504; Os-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 505; Os-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 506; Os-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 507; Os-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 508; Os-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 509; Os-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 510; Os-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 511; Os-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 512; Os-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 513; Os-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 514; Os-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 515; Os-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 516; Os-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 517; Os-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 518; Os-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 519; Os-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 520; Os-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 521; Os-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 522; Os-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 523; Os-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 524; Os-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 525; Os-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 526; Os-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 527; Os-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 528; Os-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 529; Os-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 530; Os-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 531; Os-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 532; Os-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 533; Os-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 534; Os-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 535; Os-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 536; Os-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 537; Os-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 538; Os-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 539; Os-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 540; Os-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 541; Os-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 542; Os-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 543; Os-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 544; Os-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 545; Os-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 546; Os-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 547; Os-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 548; Os-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 549; Os-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 550; Os-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 551; Os-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 552; Os-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 553; Os-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 554; Os-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 555; Os-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 556; Os-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 557; Os-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 558; Os-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 559; Os-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 560; Os-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 561; Os-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 562; Os-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 563; Os-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 564; Os-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 565; Os-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 566; Os-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 567; Os-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 568; Os-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 569; Os-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 570; Os-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 571; Os-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 572; Os-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 573; Os-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 574; Os-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 575; Os-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 576; Os-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 577; Os-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 578; Os-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 579; Os-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 580; Os-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 581; Os-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 582; Os-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 583; Os-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 584; Os-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 585; Os-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 586; Os-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 587; Os-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 588; Os-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 589; Os-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 590; Os-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 591; Os-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 592; Os-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 593; Os-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 594; Os-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 595; Os-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 596; Os-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 597; Os-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 598; Os-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 599; Os-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 600; Os-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 601; Os-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 602; Os-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 603; Os-NEXT: ret i32 [[TMP78]] 604; 605; Oz-LABEL: @enabled( 606; Oz-NEXT: entry: 607; Oz-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 608; Oz-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 609; Oz-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 610; Oz-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 611; Oz-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 612; Oz-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 613; Oz-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 614; Oz-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 615; Oz-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 616; Oz-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 617; Oz-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 618; Oz-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 619; Oz-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 620; Oz-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 621; Oz-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 622; Oz-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 623; Oz-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 624; Oz-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 625; Oz-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 626; Oz-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 627; Oz-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 628; Oz-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 629; Oz-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 630; Oz-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 631; Oz-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 632; Oz-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 633; Oz-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 634; Oz-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 635; Oz-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 636; Oz-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 637; Oz-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 638; Oz-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 639; Oz-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 640; Oz-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 641; Oz-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 642; Oz-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 643; Oz-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 644; Oz-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 645; Oz-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 646; Oz-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 647; Oz-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 648; Oz-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 649; Oz-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 650; Oz-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 651; Oz-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 652; Oz-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 653; Oz-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 654; Oz-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 655; Oz-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 656; Oz-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 657; Oz-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 658; Oz-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 659; Oz-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 660; Oz-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 661; Oz-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 662; Oz-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 663; Oz-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 664; Oz-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 665; Oz-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 666; Oz-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 667; Oz-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 668; Oz-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 669; Oz-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 670; Oz-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 671; Oz-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 672; Oz-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 673; Oz-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 674; Oz-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 675; Oz-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 676; Oz-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 677; Oz-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 678; Oz-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 679; Oz-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 680; Oz-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 681; Oz-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 682; Oz-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 683; Oz-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 684; Oz-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 685; Oz-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 686; Oz-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 687; Oz-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 688; Oz-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 689; Oz-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 690; Oz-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 691; Oz-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 692; Oz-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 693; Oz-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 694; Oz-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 695; Oz-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 696; Oz-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 697; Oz-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 698; Oz-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 699; Oz-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 700; Oz-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 701; Oz-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 702; Oz-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 703; Oz-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 704; Oz-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 705; Oz-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 706; Oz-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 707; Oz-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 708; Oz-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 709; Oz-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 710; Oz-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 711; Oz-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 712; Oz-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 713; Oz-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 714; Oz-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 715; Oz-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 716; Oz-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 717; Oz-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 718; Oz-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 719; Oz-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 720; Oz-NEXT: ret i32 [[TMP78]] 721; 722; O1VEC2-LABEL: @enabled( 723; O1VEC2-NEXT: entry: 724; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 725; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 726; O1VEC2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 727; O1VEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 728; O1VEC2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 729; O1VEC2-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 730; O1VEC2-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 731; O1VEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 732; O1VEC2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 733; O1VEC2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 734; O1VEC2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 735; O1VEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 736; O1VEC2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 737; O1VEC2-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 738; O1VEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 739; O1VEC2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 740; O1VEC2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 741; O1VEC2-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 742; O1VEC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 743; O1VEC2-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 744; O1VEC2-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 745; O1VEC2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 746; O1VEC2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 747; O1VEC2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 748; O1VEC2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 749; O1VEC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 750; O1VEC2-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 751; O1VEC2-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 752; O1VEC2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 753; O1VEC2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 754; O1VEC2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 755; O1VEC2-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 756; O1VEC2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 757; O1VEC2-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 758; O1VEC2-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 759; O1VEC2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 760; O1VEC2-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 761; O1VEC2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 762; O1VEC2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 763; O1VEC2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 764; O1VEC2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 765; O1VEC2-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 766; O1VEC2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 767; O1VEC2-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 768; O1VEC2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 769; O1VEC2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 770; O1VEC2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 771; O1VEC2-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 772; O1VEC2-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 773; O1VEC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 774; O1VEC2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 775; O1VEC2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 776; O1VEC2-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 777; O1VEC2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 778; O1VEC2-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 779; O1VEC2-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 780; O1VEC2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 781; O1VEC2-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 782; O1VEC2-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 783; O1VEC2-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 784; O1VEC2-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 785; O1VEC2-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 786; O1VEC2-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 787; O1VEC2-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 788; O1VEC2-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 789; O1VEC2-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 790; O1VEC2-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 791; O1VEC2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 792; O1VEC2-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 793; O1VEC2-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 794; O1VEC2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 795; O1VEC2-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 796; O1VEC2-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 797; O1VEC2-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 798; O1VEC2-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 799; O1VEC2-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 800; O1VEC2-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 801; O1VEC2-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 802; O1VEC2-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 803; O1VEC2-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 804; O1VEC2-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 805; O1VEC2-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 806; O1VEC2-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 807; O1VEC2-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 808; O1VEC2-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 809; O1VEC2-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 810; O1VEC2-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 811; O1VEC2-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 812; O1VEC2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 813; O1VEC2-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 814; O1VEC2-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 815; O1VEC2-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 816; O1VEC2-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 817; O1VEC2-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 818; O1VEC2-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 819; O1VEC2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 820; O1VEC2-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 821; O1VEC2-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 822; O1VEC2-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 823; O1VEC2-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 824; O1VEC2-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 825; O1VEC2-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 826; O1VEC2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 827; O1VEC2-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 828; O1VEC2-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 829; O1VEC2-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 830; O1VEC2-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 831; O1VEC2-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 832; O1VEC2-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 833; O1VEC2-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 834; O1VEC2-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 835; O1VEC2-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 836; O1VEC2-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 837; O1VEC2-NEXT: ret i32 [[TMP78]] 838; 839; OzVEC2-LABEL: @enabled( 840; OzVEC2-NEXT: entry: 841; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 842; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 843; OzVEC2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 844; OzVEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 845; OzVEC2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 846; OzVEC2-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 847; OzVEC2-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 848; OzVEC2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 849; OzVEC2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 850; OzVEC2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 851; OzVEC2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 852; OzVEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 853; OzVEC2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 854; OzVEC2-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 855; OzVEC2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 856; OzVEC2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 857; OzVEC2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 858; OzVEC2-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 859; OzVEC2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 860; OzVEC2-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 861; OzVEC2-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 862; OzVEC2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 863; OzVEC2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 864; OzVEC2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 865; OzVEC2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 866; OzVEC2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 867; OzVEC2-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 868; OzVEC2-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 869; OzVEC2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 870; OzVEC2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 871; OzVEC2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 872; OzVEC2-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 873; OzVEC2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 874; OzVEC2-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 875; OzVEC2-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 876; OzVEC2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 877; OzVEC2-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 878; OzVEC2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 879; OzVEC2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 880; OzVEC2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 881; OzVEC2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 882; OzVEC2-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 883; OzVEC2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 884; OzVEC2-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 885; OzVEC2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 886; OzVEC2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 887; OzVEC2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 888; OzVEC2-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 889; OzVEC2-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 890; OzVEC2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 891; OzVEC2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 892; OzVEC2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 893; OzVEC2-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 894; OzVEC2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 895; OzVEC2-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 896; OzVEC2-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 897; OzVEC2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 898; OzVEC2-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 899; OzVEC2-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 900; OzVEC2-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 901; OzVEC2-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 902; OzVEC2-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 903; OzVEC2-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 904; OzVEC2-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 905; OzVEC2-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 906; OzVEC2-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 907; OzVEC2-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 908; OzVEC2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 909; OzVEC2-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 910; OzVEC2-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 911; OzVEC2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 912; OzVEC2-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 913; OzVEC2-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 914; OzVEC2-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 915; OzVEC2-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 916; OzVEC2-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 917; OzVEC2-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 918; OzVEC2-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 919; OzVEC2-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 920; OzVEC2-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 921; OzVEC2-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 922; OzVEC2-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 923; OzVEC2-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 924; OzVEC2-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 925; OzVEC2-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 926; OzVEC2-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 927; OzVEC2-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 928; OzVEC2-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 929; OzVEC2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 930; OzVEC2-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 931; OzVEC2-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 932; OzVEC2-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 933; OzVEC2-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 934; OzVEC2-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 935; OzVEC2-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 936; OzVEC2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 937; OzVEC2-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 938; OzVEC2-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 939; OzVEC2-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 940; OzVEC2-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 941; OzVEC2-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 942; OzVEC2-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 943; OzVEC2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 944; OzVEC2-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 945; OzVEC2-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 946; OzVEC2-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 947; OzVEC2-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 948; OzVEC2-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 949; OzVEC2-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 950; OzVEC2-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 951; OzVEC2-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 952; OzVEC2-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 953; OzVEC2-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 954; OzVEC2-NEXT: ret i32 [[TMP78]] 955; 956; O3DIS-LABEL: @enabled( 957; O3DIS-NEXT: entry: 958; O3DIS-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 959; O3DIS-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 960; O3DIS-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 961; O3DIS-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 962; O3DIS-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 963; O3DIS-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 964; O3DIS-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 965; O3DIS-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 966; O3DIS-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 967; O3DIS-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 968; O3DIS-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 969; O3DIS-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 970; O3DIS-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 971; O3DIS-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 972; O3DIS-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 973; O3DIS-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 974; O3DIS-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 975; O3DIS-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 976; O3DIS-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 977; O3DIS-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 978; O3DIS-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 979; O3DIS-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 980; O3DIS-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 981; O3DIS-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 982; O3DIS-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 983; O3DIS-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 984; O3DIS-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 985; O3DIS-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 986; O3DIS-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 987; O3DIS-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 988; O3DIS-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 989; O3DIS-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 990; O3DIS-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 991; O3DIS-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 992; O3DIS-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 993; O3DIS-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 994; O3DIS-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 995; O3DIS-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 996; O3DIS-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 997; O3DIS-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 998; O3DIS-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 999; O3DIS-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 1000; O3DIS-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 1001; O3DIS-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 1002; O3DIS-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 1003; O3DIS-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 1004; O3DIS-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 1005; O3DIS-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 1006; O3DIS-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 1007; O3DIS-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 1008; O3DIS-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 1009; O3DIS-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 1010; O3DIS-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 1011; O3DIS-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 1012; O3DIS-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 1013; O3DIS-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 1014; O3DIS-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 1015; O3DIS-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 1016; O3DIS-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 1017; O3DIS-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 1018; O3DIS-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 1019; O3DIS-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 1020; O3DIS-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 1021; O3DIS-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 1022; O3DIS-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 1023; O3DIS-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 1024; O3DIS-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 1025; O3DIS-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 1026; O3DIS-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 1027; O3DIS-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 1028; O3DIS-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 1029; O3DIS-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 1030; O3DIS-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 1031; O3DIS-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 1032; O3DIS-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 1033; O3DIS-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 1034; O3DIS-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 1035; O3DIS-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 1036; O3DIS-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 1037; O3DIS-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 1038; O3DIS-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 1039; O3DIS-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 1040; O3DIS-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 1041; O3DIS-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 1042; O3DIS-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 1043; O3DIS-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 1044; O3DIS-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 1045; O3DIS-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 1046; O3DIS-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 1047; O3DIS-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 1048; O3DIS-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 1049; O3DIS-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 1050; O3DIS-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 1051; O3DIS-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 1052; O3DIS-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 1053; O3DIS-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 1054; O3DIS-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 1055; O3DIS-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 1056; O3DIS-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 1057; O3DIS-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 1058; O3DIS-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 1059; O3DIS-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 1060; O3DIS-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 1061; O3DIS-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 1062; O3DIS-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 1063; O3DIS-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 1064; O3DIS-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 1065; O3DIS-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 1066; O3DIS-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 1067; O3DIS-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 1068; O3DIS-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 1069; O3DIS-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 1070; O3DIS-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 1071; O3DIS-NEXT: ret i32 [[TMP78]] 1072; 1073entry: 1074 br label %for.body 1075 1076for.body: ; preds = %for.body, %entry 1077 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 1078 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv 1079 %0 = load i32, i32* %arrayidx, align 4 1080 %add = add nsw i32 %0, %N 1081 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 1082 store i32 %add, i32* %arrayidx2, align 4 1083 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1084 %exitcond = icmp eq i64 %indvars.iv.next, 64 1085 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0 1086 1087for.end: ; preds = %for.body 1088 %1 = load i32, i32* %a, align 4 1089 ret i32 %1 1090} 1091 1092define i32 @nopragma(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { 1093; O1-LABEL: @nopragma( 1094; O1-NEXT: entry: 1095; O1-NEXT: br label [[FOR_BODY:%.*]] 1096; O1: for.body: 1097; O1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1098; O1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1099; O1-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1100; O1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1101; O1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1102; O1-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1103; O1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1104; O1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 1105; O1-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] 1106; O1: for.end: 1107; O1-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1108; O1-NEXT: ret i32 [[TMP1]] 1109; 1110; O2-LABEL: @nopragma( 1111; O2-NEXT: entry: 1112; O2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 1113; O2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 1114; O2-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 1115; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 1116; O2-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 1117; O2-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 1118; O2-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 1119; O2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 1120; O2-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 1121; O2-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 1122; O2-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 1123; O2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 1124; O2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 1125; O2-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 1126; O2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 1127; O2-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 1128; O2-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 1129; O2-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 1130; O2-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 1131; O2-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 1132; O2-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 1133; O2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 1134; O2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 1135; O2-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 1136; O2-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 1137; O2-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 1138; O2-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 1139; O2-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 1140; O2-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 1141; O2-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 1142; O2-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 1143; O2-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 1144; O2-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 1145; O2-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 1146; O2-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 1147; O2-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 1148; O2-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 1149; O2-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 1150; O2-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 1151; O2-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 1152; O2-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 1153; O2-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 1154; O2-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 1155; O2-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 1156; O2-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 1157; O2-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 1158; O2-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 1159; O2-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 1160; O2-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 1161; O2-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 1162; O2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 1163; O2-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 1164; O2-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 1165; O2-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 1166; O2-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 1167; O2-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 1168; O2-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 1169; O2-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 1170; O2-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 1171; O2-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 1172; O2-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 1173; O2-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 1174; O2-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 1175; O2-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 1176; O2-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 1177; O2-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 1178; O2-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 1179; O2-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 1180; O2-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 1181; O2-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 1182; O2-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 1183; O2-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 1184; O2-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 1185; O2-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 1186; O2-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 1187; O2-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 1188; O2-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 1189; O2-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 1190; O2-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 1191; O2-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 1192; O2-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 1193; O2-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 1194; O2-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 1195; O2-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 1196; O2-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 1197; O2-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 1198; O2-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 1199; O2-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 1200; O2-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 1201; O2-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 1202; O2-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 1203; O2-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 1204; O2-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 1205; O2-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 1206; O2-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 1207; O2-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 1208; O2-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 1209; O2-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 1210; O2-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 1211; O2-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 1212; O2-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 1213; O2-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 1214; O2-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 1215; O2-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 1216; O2-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 1217; O2-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 1218; O2-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 1219; O2-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 1220; O2-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 1221; O2-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 1222; O2-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 1223; O2-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 1224; O2-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 1225; O2-NEXT: ret i32 [[TMP78]] 1226; 1227; O3-LABEL: @nopragma( 1228; O3-NEXT: entry: 1229; O3-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 1230; O3-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 1231; O3-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 1232; O3-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 1233; O3-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 1234; O3-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 1235; O3-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 1236; O3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 1237; O3-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 1238; O3-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 1239; O3-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 1240; O3-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 1241; O3-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 1242; O3-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 1243; O3-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 1244; O3-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 1245; O3-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 1246; O3-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 1247; O3-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 1248; O3-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 1249; O3-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 1250; O3-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 1251; O3-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 1252; O3-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 1253; O3-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 1254; O3-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 1255; O3-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 1256; O3-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 1257; O3-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 1258; O3-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 1259; O3-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 1260; O3-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 1261; O3-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 1262; O3-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 1263; O3-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 1264; O3-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 1265; O3-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 1266; O3-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 1267; O3-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 1268; O3-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 1269; O3-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 1270; O3-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 1271; O3-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 1272; O3-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 1273; O3-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 1274; O3-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 1275; O3-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 1276; O3-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 1277; O3-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 1278; O3-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 1279; O3-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 1280; O3-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 1281; O3-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 1282; O3-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 1283; O3-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 1284; O3-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 1285; O3-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 1286; O3-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 1287; O3-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 1288; O3-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 1289; O3-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 1290; O3-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 1291; O3-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 1292; O3-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 1293; O3-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 1294; O3-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 1295; O3-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 1296; O3-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 1297; O3-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 1298; O3-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 1299; O3-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 1300; O3-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 1301; O3-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 1302; O3-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 1303; O3-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 1304; O3-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 1305; O3-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 1306; O3-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 1307; O3-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 1308; O3-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 1309; O3-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 1310; O3-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 1311; O3-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 1312; O3-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 1313; O3-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 1314; O3-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 1315; O3-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 1316; O3-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 1317; O3-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 1318; O3-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 1319; O3-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 1320; O3-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 1321; O3-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 1322; O3-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 1323; O3-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 1324; O3-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 1325; O3-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 1326; O3-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 1327; O3-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 1328; O3-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 1329; O3-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 1330; O3-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 1331; O3-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 1332; O3-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 1333; O3-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 1334; O3-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 1335; O3-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 1336; O3-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 1337; O3-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 1338; O3-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 1339; O3-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 1340; O3-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 1341; O3-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 1342; O3-NEXT: ret i32 [[TMP78]] 1343; 1344; O3DEFAULT-LABEL: @nopragma( 1345; O3DEFAULT-NEXT: entry: 1346; O3DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 1347; O3DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 1348; O3DEFAULT-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 1349; O3DEFAULT-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 1350; O3DEFAULT-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 1351; O3DEFAULT-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 1352; O3DEFAULT-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 1353; O3DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 1354; O3DEFAULT-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 1355; O3DEFAULT-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 1356; O3DEFAULT-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 1357; O3DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 1358; O3DEFAULT-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 1359; O3DEFAULT-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 1360; O3DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 1361; O3DEFAULT-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 1362; O3DEFAULT-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 1363; O3DEFAULT-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 1364; O3DEFAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 1365; O3DEFAULT-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 1366; O3DEFAULT-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 1367; O3DEFAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 1368; O3DEFAULT-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 1369; O3DEFAULT-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 1370; O3DEFAULT-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 1371; O3DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 1372; O3DEFAULT-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 1373; O3DEFAULT-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 1374; O3DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 1375; O3DEFAULT-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 1376; O3DEFAULT-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 1377; O3DEFAULT-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 1378; O3DEFAULT-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 1379; O3DEFAULT-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 1380; O3DEFAULT-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 1381; O3DEFAULT-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 1382; O3DEFAULT-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 1383; O3DEFAULT-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 1384; O3DEFAULT-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 1385; O3DEFAULT-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 1386; O3DEFAULT-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 1387; O3DEFAULT-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 1388; O3DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 1389; O3DEFAULT-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 1390; O3DEFAULT-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 1391; O3DEFAULT-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 1392; O3DEFAULT-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 1393; O3DEFAULT-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 1394; O3DEFAULT-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 1395; O3DEFAULT-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 1396; O3DEFAULT-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 1397; O3DEFAULT-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 1398; O3DEFAULT-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 1399; O3DEFAULT-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 1400; O3DEFAULT-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 1401; O3DEFAULT-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 1402; O3DEFAULT-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 1403; O3DEFAULT-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 1404; O3DEFAULT-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 1405; O3DEFAULT-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 1406; O3DEFAULT-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 1407; O3DEFAULT-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 1408; O3DEFAULT-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 1409; O3DEFAULT-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 1410; O3DEFAULT-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 1411; O3DEFAULT-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 1412; O3DEFAULT-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 1413; O3DEFAULT-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 1414; O3DEFAULT-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 1415; O3DEFAULT-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 1416; O3DEFAULT-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 1417; O3DEFAULT-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 1418; O3DEFAULT-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 1419; O3DEFAULT-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 1420; O3DEFAULT-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 1421; O3DEFAULT-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 1422; O3DEFAULT-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 1423; O3DEFAULT-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 1424; O3DEFAULT-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 1425; O3DEFAULT-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 1426; O3DEFAULT-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 1427; O3DEFAULT-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 1428; O3DEFAULT-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 1429; O3DEFAULT-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 1430; O3DEFAULT-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 1431; O3DEFAULT-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 1432; O3DEFAULT-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 1433; O3DEFAULT-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 1434; O3DEFAULT-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 1435; O3DEFAULT-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 1436; O3DEFAULT-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 1437; O3DEFAULT-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 1438; O3DEFAULT-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 1439; O3DEFAULT-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 1440; O3DEFAULT-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 1441; O3DEFAULT-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 1442; O3DEFAULT-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 1443; O3DEFAULT-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 1444; O3DEFAULT-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 1445; O3DEFAULT-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 1446; O3DEFAULT-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 1447; O3DEFAULT-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 1448; O3DEFAULT-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 1449; O3DEFAULT-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 1450; O3DEFAULT-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 1451; O3DEFAULT-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 1452; O3DEFAULT-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 1453; O3DEFAULT-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 1454; O3DEFAULT-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 1455; O3DEFAULT-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 1456; O3DEFAULT-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 1457; O3DEFAULT-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 1458; O3DEFAULT-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 1459; O3DEFAULT-NEXT: ret i32 [[TMP78]] 1460; 1461; Os-LABEL: @nopragma( 1462; Os-NEXT: entry: 1463; Os-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 1464; Os-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 1465; Os-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 1466; Os-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 1467; Os-NEXT: [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 1468; Os-NEXT: [[TMP2:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 1469; Os-NEXT: store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4 1470; Os-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 1471; Os-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 1472; Os-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 1473; Os-NEXT: [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_1]], [[BROADCAST_SPLAT]] 1474; Os-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 1475; Os-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 1476; Os-NEXT: store <4 x i32> [[TMP5]], <4 x i32>* [[TMP7]], align 4 1477; Os-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 1478; Os-NEXT: [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>* 1479; Os-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 1480; Os-NEXT: [[TMP10:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_2]], [[BROADCAST_SPLAT]] 1481; Os-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 1482; Os-NEXT: [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>* 1483; Os-NEXT: store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4 1484; Os-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 1485; Os-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>* 1486; Os-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4 1487; Os-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_3]], [[BROADCAST_SPLAT]] 1488; Os-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 1489; Os-NEXT: [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>* 1490; Os-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4 1491; Os-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 1492; Os-NEXT: [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>* 1493; Os-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP19]], align 4 1494; Os-NEXT: [[TMP20:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_4]], [[BROADCAST_SPLAT]] 1495; Os-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 1496; Os-NEXT: [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>* 1497; Os-NEXT: store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4 1498; Os-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 1499; Os-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>* 1500; Os-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP24]], align 4 1501; Os-NEXT: [[TMP25:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_5]], [[BROADCAST_SPLAT]] 1502; Os-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 1503; Os-NEXT: [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>* 1504; Os-NEXT: store <4 x i32> [[TMP25]], <4 x i32>* [[TMP27]], align 4 1505; Os-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 1506; Os-NEXT: [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>* 1507; Os-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 1508; Os-NEXT: [[TMP30:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_6]], [[BROADCAST_SPLAT]] 1509; Os-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 1510; Os-NEXT: [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <4 x i32>* 1511; Os-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP32]], align 4 1512; Os-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 1513; Os-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP33]] to <4 x i32>* 1514; Os-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x i32>, <4 x i32>* [[TMP34]], align 4 1515; Os-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_7]], [[BROADCAST_SPLAT]] 1516; Os-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 1517; Os-NEXT: [[TMP37:%.*]] = bitcast i32* [[TMP36]] to <4 x i32>* 1518; Os-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP37]], align 4 1519; Os-NEXT: [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 1520; Os-NEXT: [[TMP39:%.*]] = bitcast i32* [[TMP38]] to <4 x i32>* 1521; Os-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP39]], align 4 1522; Os-NEXT: [[TMP40:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_8]], [[BROADCAST_SPLAT]] 1523; Os-NEXT: [[TMP41:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 1524; Os-NEXT: [[TMP42:%.*]] = bitcast i32* [[TMP41]] to <4 x i32>* 1525; Os-NEXT: store <4 x i32> [[TMP40]], <4 x i32>* [[TMP42]], align 4 1526; Os-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 1527; Os-NEXT: [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>* 1528; Os-NEXT: [[WIDE_LOAD_9:%.*]] = load <4 x i32>, <4 x i32>* [[TMP44]], align 4 1529; Os-NEXT: [[TMP45:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_9]], [[BROADCAST_SPLAT]] 1530; Os-NEXT: [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 1531; Os-NEXT: [[TMP47:%.*]] = bitcast i32* [[TMP46]] to <4 x i32>* 1532; Os-NEXT: store <4 x i32> [[TMP45]], <4 x i32>* [[TMP47]], align 4 1533; Os-NEXT: [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 1534; Os-NEXT: [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>* 1535; Os-NEXT: [[WIDE_LOAD_10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP49]], align 4 1536; Os-NEXT: [[TMP50:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_10]], [[BROADCAST_SPLAT]] 1537; Os-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 1538; Os-NEXT: [[TMP52:%.*]] = bitcast i32* [[TMP51]] to <4 x i32>* 1539; Os-NEXT: store <4 x i32> [[TMP50]], <4 x i32>* [[TMP52]], align 4 1540; Os-NEXT: [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 1541; Os-NEXT: [[TMP54:%.*]] = bitcast i32* [[TMP53]] to <4 x i32>* 1542; Os-NEXT: [[WIDE_LOAD_11:%.*]] = load <4 x i32>, <4 x i32>* [[TMP54]], align 4 1543; Os-NEXT: [[TMP55:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_11]], [[BROADCAST_SPLAT]] 1544; Os-NEXT: [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 1545; Os-NEXT: [[TMP57:%.*]] = bitcast i32* [[TMP56]] to <4 x i32>* 1546; Os-NEXT: store <4 x i32> [[TMP55]], <4 x i32>* [[TMP57]], align 4 1547; Os-NEXT: [[TMP58:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 48 1548; Os-NEXT: [[TMP59:%.*]] = bitcast i32* [[TMP58]] to <4 x i32>* 1549; Os-NEXT: [[WIDE_LOAD_12:%.*]] = load <4 x i32>, <4 x i32>* [[TMP59]], align 4 1550; Os-NEXT: [[TMP60:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_12]], [[BROADCAST_SPLAT]] 1551; Os-NEXT: [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 48 1552; Os-NEXT: [[TMP62:%.*]] = bitcast i32* [[TMP61]] to <4 x i32>* 1553; Os-NEXT: store <4 x i32> [[TMP60]], <4 x i32>* [[TMP62]], align 4 1554; Os-NEXT: [[TMP63:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 52 1555; Os-NEXT: [[TMP64:%.*]] = bitcast i32* [[TMP63]] to <4 x i32>* 1556; Os-NEXT: [[WIDE_LOAD_13:%.*]] = load <4 x i32>, <4 x i32>* [[TMP64]], align 4 1557; Os-NEXT: [[TMP65:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_13]], [[BROADCAST_SPLAT]] 1558; Os-NEXT: [[TMP66:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 52 1559; Os-NEXT: [[TMP67:%.*]] = bitcast i32* [[TMP66]] to <4 x i32>* 1560; Os-NEXT: store <4 x i32> [[TMP65]], <4 x i32>* [[TMP67]], align 4 1561; Os-NEXT: [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 56 1562; Os-NEXT: [[TMP69:%.*]] = bitcast i32* [[TMP68]] to <4 x i32>* 1563; Os-NEXT: [[WIDE_LOAD_14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP69]], align 4 1564; Os-NEXT: [[TMP70:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_14]], [[BROADCAST_SPLAT]] 1565; Os-NEXT: [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 56 1566; Os-NEXT: [[TMP72:%.*]] = bitcast i32* [[TMP71]] to <4 x i32>* 1567; Os-NEXT: store <4 x i32> [[TMP70]], <4 x i32>* [[TMP72]], align 4 1568; Os-NEXT: [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 60 1569; Os-NEXT: [[TMP74:%.*]] = bitcast i32* [[TMP73]] to <4 x i32>* 1570; Os-NEXT: [[WIDE_LOAD_15:%.*]] = load <4 x i32>, <4 x i32>* [[TMP74]], align 4 1571; Os-NEXT: [[TMP75:%.*]] = add nsw <4 x i32> [[WIDE_LOAD_15]], [[BROADCAST_SPLAT]] 1572; Os-NEXT: [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 60 1573; Os-NEXT: [[TMP77:%.*]] = bitcast i32* [[TMP76]] to <4 x i32>* 1574; Os-NEXT: store <4 x i32> [[TMP75]], <4 x i32>* [[TMP77]], align 4 1575; Os-NEXT: [[TMP78:%.*]] = load i32, i32* [[A]], align 4 1576; Os-NEXT: ret i32 [[TMP78]] 1577; 1578; Oz-LABEL: @nopragma( 1579; Oz-NEXT: entry: 1580; Oz-NEXT: br label [[FOR_BODY:%.*]] 1581; Oz: for.body: 1582; Oz-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1583; Oz-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1584; Oz-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1585; Oz-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1586; Oz-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1587; Oz-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1588; Oz-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1589; Oz-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 1590; Oz-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] 1591; Oz: for.end: 1592; Oz-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1593; Oz-NEXT: ret i32 [[TMP1]] 1594; 1595; O1VEC2-LABEL: @nopragma( 1596; O1VEC2-NEXT: entry: 1597; O1VEC2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1598; O1VEC2: vector.ph: 1599; O1VEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 1600; O1VEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 1601; O1VEC2-NEXT: br label [[VECTOR_BODY:%.*]] 1602; O1VEC2: vector.body: 1603; O1VEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1604; O1VEC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1605; O1VEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] 1606; O1VEC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 1607; O1VEC2-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* 1608; O1VEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 1609; O1VEC2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 1610; O1VEC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] 1611; O1VEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 1612; O1VEC2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 1613; O1VEC2-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP7]], align 4 1614; O1VEC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1615; O1VEC2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 1616; O1VEC2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 1617; O1VEC2: middle.block: 1618; O1VEC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, 64 1619; O1VEC2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1620; O1VEC2: scalar.ph: 1621; O1VEC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1622; O1VEC2-NEXT: br label [[FOR_BODY:%.*]] 1623; O1VEC2: for.body: 1624; O1VEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1625; O1VEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] 1626; O1VEC2-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1627; O1VEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[N]] 1628; O1VEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 1629; O1VEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1630; O1VEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1631; O1VEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 1632; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 1633; O1VEC2: for.end: 1634; O1VEC2-NEXT: [[TMP10:%.*]] = load i32, i32* [[A]], align 4 1635; O1VEC2-NEXT: ret i32 [[TMP10]] 1636; 1637; OzVEC2-LABEL: @nopragma( 1638; OzVEC2-NEXT: entry: 1639; OzVEC2-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1640; OzVEC2: vector.ph: 1641; OzVEC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0 1642; OzVEC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 1643; OzVEC2-NEXT: br label [[VECTOR_BODY:%.*]] 1644; OzVEC2: vector.body: 1645; OzVEC2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1646; OzVEC2-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 1647; OzVEC2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]] 1648; OzVEC2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0 1649; OzVEC2-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>* 1650; OzVEC2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4 1651; OzVEC2-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 1652; OzVEC2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]] 1653; OzVEC2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 1654; OzVEC2-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* 1655; OzVEC2-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* [[TMP7]], align 4 1656; OzVEC2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 1657; OzVEC2-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64 1658; OzVEC2-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 1659; OzVEC2: middle.block: 1660; OzVEC2-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, 64 1661; OzVEC2-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 1662; OzVEC2: scalar.ph: 1663; OzVEC2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 1664; OzVEC2-NEXT: br label [[FOR_BODY:%.*]] 1665; OzVEC2: for.body: 1666; OzVEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1667; OzVEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] 1668; OzVEC2-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1669; OzVEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[N]] 1670; OzVEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 1671; OzVEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1672; OzVEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1673; OzVEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 1674; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 1675; OzVEC2: for.end: 1676; OzVEC2-NEXT: [[TMP10:%.*]] = load i32, i32* [[A]], align 4 1677; OzVEC2-NEXT: ret i32 [[TMP10]] 1678; 1679; O3DIS-LABEL: @nopragma( 1680; O3DIS-NEXT: entry: 1681; O3DIS-NEXT: br label [[FOR_BODY:%.*]] 1682; O3DIS: for.body: 1683; O3DIS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1684; O3DIS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1685; O3DIS-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1686; O3DIS-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1687; O3DIS-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1688; O3DIS-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1689; O3DIS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1690; O3DIS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 64 1691; O3DIS-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]] 1692; O3DIS: for.end: 1693; O3DIS-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1694; O3DIS-NEXT: ret i32 [[TMP1]] 1695; 1696entry: 1697 br label %for.body 1698 1699for.body: ; preds = %for.body, %entry 1700 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 1701 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv 1702 %0 = load i32, i32* %arrayidx, align 4 1703 %add = add nsw i32 %0, %N 1704 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 1705 store i32 %add, i32* %arrayidx2, align 4 1706 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1707 %exitcond = icmp eq i64 %indvars.iv.next, 64 1708 br i1 %exitcond, label %for.end, label %for.body 1709 1710for.end: ; preds = %for.body 1711 %1 = load i32, i32* %a, align 4 1712 ret i32 %1 1713} 1714 1715define i32 @disabled(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %N) { 1716; O1-LABEL: @disabled( 1717; O1-NEXT: entry: 1718; O1-NEXT: br label [[FOR_BODY:%.*]] 1719; O1: for.body: 1720; O1-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1721; O1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1722; O1-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1723; O1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1724; O1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1725; O1-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1726; O1-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1727; O1-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1728; O1-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 1729; O1: for.end: 1730; O1-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1731; O1-NEXT: ret i32 [[TMP1]] 1732; 1733; O2-LABEL: @disabled( 1734; O2-NEXT: entry: 1735; O2-NEXT: br label [[FOR_BODY:%.*]] 1736; O2: for.body: 1737; O2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1738; O2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1739; O2-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1740; O2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1741; O2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1742; O2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1743; O2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1744; O2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1745; O2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 1746; O2: for.end: 1747; O2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1748; O2-NEXT: ret i32 [[TMP1]] 1749; 1750; O3-LABEL: @disabled( 1751; O3-NEXT: entry: 1752; O3-NEXT: br label [[FOR_BODY:%.*]] 1753; O3: for.body: 1754; O3-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1755; O3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1756; O3-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1757; O3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1758; O3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1759; O3-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1760; O3-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1761; O3-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1762; O3-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 1763; O3: for.end: 1764; O3-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1765; O3-NEXT: ret i32 [[TMP1]] 1766; 1767; O3DEFAULT-LABEL: @disabled( 1768; O3DEFAULT-NEXT: entry: 1769; O3DEFAULT-NEXT: [[TMP0:%.*]] = bitcast i32* [[B:%.*]] to <4 x i32>* 1770; O3DEFAULT-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4 1771; O3DEFAULT-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i64 0 1772; O3DEFAULT-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> zeroinitializer 1773; O3DEFAULT-NEXT: [[TMP3:%.*]] = add nsw <4 x i32> [[TMP1]], [[SHUFFLE]] 1774; O3DEFAULT-NEXT: [[TMP4:%.*]] = bitcast i32* [[A:%.*]] to <4 x i32>* 1775; O3DEFAULT-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4 1776; O3DEFAULT-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 4 1777; O3DEFAULT-NEXT: [[ARRAYIDX2_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4 1778; O3DEFAULT-NEXT: [[TMP5:%.*]] = bitcast i32* [[ARRAYIDX_4]] to <4 x i32>* 1779; O3DEFAULT-NEXT: [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4 1780; O3DEFAULT-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], [[SHUFFLE]] 1781; O3DEFAULT-NEXT: [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX2_4]] to <4 x i32>* 1782; O3DEFAULT-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP8]], align 4 1783; O3DEFAULT-NEXT: [[ARRAYIDX_8:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 8 1784; O3DEFAULT-NEXT: [[ARRAYIDX2_8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 8 1785; O3DEFAULT-NEXT: [[TMP9:%.*]] = bitcast i32* [[ARRAYIDX_8]] to <4 x i32>* 1786; O3DEFAULT-NEXT: [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4 1787; O3DEFAULT-NEXT: [[TMP11:%.*]] = add nsw <4 x i32> [[TMP10]], [[SHUFFLE]] 1788; O3DEFAULT-NEXT: [[TMP12:%.*]] = bitcast i32* [[ARRAYIDX2_8]] to <4 x i32>* 1789; O3DEFAULT-NEXT: store <4 x i32> [[TMP11]], <4 x i32>* [[TMP12]], align 4 1790; O3DEFAULT-NEXT: [[ARRAYIDX_12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 12 1791; O3DEFAULT-NEXT: [[ARRAYIDX2_12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 12 1792; O3DEFAULT-NEXT: [[TMP13:%.*]] = bitcast i32* [[ARRAYIDX_12]] to <4 x i32>* 1793; O3DEFAULT-NEXT: [[TMP14:%.*]] = load <4 x i32>, <4 x i32>* [[TMP13]], align 4 1794; O3DEFAULT-NEXT: [[TMP15:%.*]] = add nsw <4 x i32> [[TMP14]], [[SHUFFLE]] 1795; O3DEFAULT-NEXT: [[TMP16:%.*]] = bitcast i32* [[ARRAYIDX2_12]] to <4 x i32>* 1796; O3DEFAULT-NEXT: store <4 x i32> [[TMP15]], <4 x i32>* [[TMP16]], align 4 1797; O3DEFAULT-NEXT: [[ARRAYIDX_16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 16 1798; O3DEFAULT-NEXT: [[ARRAYIDX2_16:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 16 1799; O3DEFAULT-NEXT: [[TMP17:%.*]] = bitcast i32* [[ARRAYIDX_16]] to <4 x i32>* 1800; O3DEFAULT-NEXT: [[TMP18:%.*]] = load <4 x i32>, <4 x i32>* [[TMP17]], align 4 1801; O3DEFAULT-NEXT: [[TMP19:%.*]] = add nsw <4 x i32> [[TMP18]], [[SHUFFLE]] 1802; O3DEFAULT-NEXT: [[TMP20:%.*]] = bitcast i32* [[ARRAYIDX2_16]] to <4 x i32>* 1803; O3DEFAULT-NEXT: store <4 x i32> [[TMP19]], <4 x i32>* [[TMP20]], align 4 1804; O3DEFAULT-NEXT: [[ARRAYIDX_20:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 20 1805; O3DEFAULT-NEXT: [[ARRAYIDX2_20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 20 1806; O3DEFAULT-NEXT: [[TMP21:%.*]] = bitcast i32* [[ARRAYIDX_20]] to <4 x i32>* 1807; O3DEFAULT-NEXT: [[TMP22:%.*]] = load <4 x i32>, <4 x i32>* [[TMP21]], align 4 1808; O3DEFAULT-NEXT: [[TMP23:%.*]] = add nsw <4 x i32> [[TMP22]], [[SHUFFLE]] 1809; O3DEFAULT-NEXT: [[TMP24:%.*]] = bitcast i32* [[ARRAYIDX2_20]] to <4 x i32>* 1810; O3DEFAULT-NEXT: store <4 x i32> [[TMP23]], <4 x i32>* [[TMP24]], align 4 1811; O3DEFAULT-NEXT: [[ARRAYIDX_24:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 24 1812; O3DEFAULT-NEXT: [[ARRAYIDX2_24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 24 1813; O3DEFAULT-NEXT: [[TMP25:%.*]] = bitcast i32* [[ARRAYIDX_24]] to <4 x i32>* 1814; O3DEFAULT-NEXT: [[TMP26:%.*]] = load <4 x i32>, <4 x i32>* [[TMP25]], align 4 1815; O3DEFAULT-NEXT: [[TMP27:%.*]] = add nsw <4 x i32> [[TMP26]], [[SHUFFLE]] 1816; O3DEFAULT-NEXT: [[TMP28:%.*]] = bitcast i32* [[ARRAYIDX2_24]] to <4 x i32>* 1817; O3DEFAULT-NEXT: store <4 x i32> [[TMP27]], <4 x i32>* [[TMP28]], align 4 1818; O3DEFAULT-NEXT: [[ARRAYIDX_28:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 28 1819; O3DEFAULT-NEXT: [[ARRAYIDX2_28:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 28 1820; O3DEFAULT-NEXT: [[TMP29:%.*]] = bitcast i32* [[ARRAYIDX_28]] to <4 x i32>* 1821; O3DEFAULT-NEXT: [[TMP30:%.*]] = load <4 x i32>, <4 x i32>* [[TMP29]], align 4 1822; O3DEFAULT-NEXT: [[TMP31:%.*]] = add nsw <4 x i32> [[TMP30]], [[SHUFFLE]] 1823; O3DEFAULT-NEXT: [[TMP32:%.*]] = bitcast i32* [[ARRAYIDX2_28]] to <4 x i32>* 1824; O3DEFAULT-NEXT: store <4 x i32> [[TMP31]], <4 x i32>* [[TMP32]], align 4 1825; O3DEFAULT-NEXT: [[ARRAYIDX_32:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 32 1826; O3DEFAULT-NEXT: [[ARRAYIDX2_32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 32 1827; O3DEFAULT-NEXT: [[TMP33:%.*]] = bitcast i32* [[ARRAYIDX_32]] to <4 x i32>* 1828; O3DEFAULT-NEXT: [[TMP34:%.*]] = load <4 x i32>, <4 x i32>* [[TMP33]], align 4 1829; O3DEFAULT-NEXT: [[TMP35:%.*]] = add nsw <4 x i32> [[TMP34]], [[SHUFFLE]] 1830; O3DEFAULT-NEXT: [[TMP36:%.*]] = bitcast i32* [[ARRAYIDX2_32]] to <4 x i32>* 1831; O3DEFAULT-NEXT: store <4 x i32> [[TMP35]], <4 x i32>* [[TMP36]], align 4 1832; O3DEFAULT-NEXT: [[ARRAYIDX_36:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 36 1833; O3DEFAULT-NEXT: [[ARRAYIDX2_36:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 36 1834; O3DEFAULT-NEXT: [[TMP37:%.*]] = bitcast i32* [[ARRAYIDX_36]] to <4 x i32>* 1835; O3DEFAULT-NEXT: [[TMP38:%.*]] = load <4 x i32>, <4 x i32>* [[TMP37]], align 4 1836; O3DEFAULT-NEXT: [[TMP39:%.*]] = add nsw <4 x i32> [[TMP38]], [[SHUFFLE]] 1837; O3DEFAULT-NEXT: [[TMP40:%.*]] = bitcast i32* [[ARRAYIDX2_36]] to <4 x i32>* 1838; O3DEFAULT-NEXT: store <4 x i32> [[TMP39]], <4 x i32>* [[TMP40]], align 4 1839; O3DEFAULT-NEXT: [[ARRAYIDX_40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 40 1840; O3DEFAULT-NEXT: [[ARRAYIDX2_40:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 40 1841; O3DEFAULT-NEXT: [[TMP41:%.*]] = bitcast i32* [[ARRAYIDX_40]] to <4 x i32>* 1842; O3DEFAULT-NEXT: [[TMP42:%.*]] = load <4 x i32>, <4 x i32>* [[TMP41]], align 4 1843; O3DEFAULT-NEXT: [[TMP43:%.*]] = add nsw <4 x i32> [[TMP42]], [[SHUFFLE]] 1844; O3DEFAULT-NEXT: [[TMP44:%.*]] = bitcast i32* [[ARRAYIDX2_40]] to <4 x i32>* 1845; O3DEFAULT-NEXT: store <4 x i32> [[TMP43]], <4 x i32>* [[TMP44]], align 4 1846; O3DEFAULT-NEXT: [[ARRAYIDX_44:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 44 1847; O3DEFAULT-NEXT: [[ARRAYIDX2_44:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 44 1848; O3DEFAULT-NEXT: [[TMP45:%.*]] = bitcast i32* [[ARRAYIDX_44]] to <4 x i32>* 1849; O3DEFAULT-NEXT: [[TMP46:%.*]] = load <4 x i32>, <4 x i32>* [[TMP45]], align 4 1850; O3DEFAULT-NEXT: [[TMP47:%.*]] = add nsw <4 x i32> [[TMP46]], [[SHUFFLE]] 1851; O3DEFAULT-NEXT: [[TMP48:%.*]] = bitcast i32* [[ARRAYIDX2_44]] to <4 x i32>* 1852; O3DEFAULT-NEXT: store <4 x i32> [[TMP47]], <4 x i32>* [[TMP48]], align 4 1853; O3DEFAULT-NEXT: [[TMP49:%.*]] = load i32, i32* [[A]], align 4 1854; O3DEFAULT-NEXT: ret i32 [[TMP49]] 1855; 1856; Os-LABEL: @disabled( 1857; Os-NEXT: entry: 1858; Os-NEXT: br label [[FOR_BODY:%.*]] 1859; Os: for.body: 1860; Os-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1861; Os-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1862; Os-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1863; Os-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1864; Os-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1865; Os-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1866; Os-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1867; Os-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1868; Os-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 1869; Os: for.end: 1870; Os-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1871; Os-NEXT: ret i32 [[TMP1]] 1872; 1873; Oz-LABEL: @disabled( 1874; Oz-NEXT: entry: 1875; Oz-NEXT: br label [[FOR_BODY:%.*]] 1876; Oz: for.body: 1877; Oz-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1878; Oz-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1879; Oz-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1880; Oz-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1881; Oz-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1882; Oz-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1883; Oz-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1884; Oz-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1885; Oz-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 1886; Oz: for.end: 1887; Oz-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1888; Oz-NEXT: ret i32 [[TMP1]] 1889; 1890; O1VEC2-LABEL: @disabled( 1891; O1VEC2-NEXT: entry: 1892; O1VEC2-NEXT: br label [[FOR_BODY:%.*]] 1893; O1VEC2: for.body: 1894; O1VEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1895; O1VEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1896; O1VEC2-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1897; O1VEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1898; O1VEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1899; O1VEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1900; O1VEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1901; O1VEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1902; O1VEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 1903; O1VEC2: for.end: 1904; O1VEC2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1905; O1VEC2-NEXT: ret i32 [[TMP1]] 1906; 1907; OzVEC2-LABEL: @disabled( 1908; OzVEC2-NEXT: entry: 1909; OzVEC2-NEXT: br label [[FOR_BODY:%.*]] 1910; OzVEC2: for.body: 1911; OzVEC2-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1912; OzVEC2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1913; OzVEC2-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1914; OzVEC2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1915; OzVEC2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1916; OzVEC2-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1917; OzVEC2-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1918; OzVEC2-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1919; OzVEC2-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 1920; OzVEC2: for.end: 1921; OzVEC2-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1922; OzVEC2-NEXT: ret i32 [[TMP1]] 1923; 1924; O3DIS-LABEL: @disabled( 1925; O3DIS-NEXT: entry: 1926; O3DIS-NEXT: br label [[FOR_BODY:%.*]] 1927; O3DIS: for.body: 1928; O3DIS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 1929; O3DIS-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV]] 1930; O3DIS-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 1931; O3DIS-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP0]], [[N:%.*]] 1932; O3DIS-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] 1933; O3DIS-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4 1934; O3DIS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 1935; O3DIS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 48 1936; O3DIS-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 1937; O3DIS: for.end: 1938; O3DIS-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4 1939; O3DIS-NEXT: ret i32 [[TMP1]] 1940; 1941entry: 1942 br label %for.body 1943 1944for.body: ; preds = %for.body, %entry 1945 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 1946 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv 1947 %0 = load i32, i32* %arrayidx, align 4 1948 %add = add nsw i32 %0, %N 1949 %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv 1950 store i32 %add, i32* %arrayidx2, align 4 1951 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1952 %exitcond = icmp eq i64 %indvars.iv.next, 48 1953 br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2 1954 1955for.end: ; preds = %for.body 1956 %1 = load i32, i32* %a, align 4 1957 ret i32 %1 1958} 1959 1960!0 = !{!0, !1} 1961!1 = !{!"llvm.loop.vectorize.enable", i1 1} 1962!2 = !{!2, !3} 1963!3 = !{!"llvm.loop.vectorize.enable", i1 0} 1964