1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -slp-vectorizer -instcombine -mtriple=aarch64--linux-gnu < %s | FileCheck %s 3 4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 5target triple = "aarch64" 6 7define i16 @reduce_allstrided(i16* nocapture noundef readonly %x, i16* nocapture noundef readonly %y, i32 noundef %stride) { 8; CHECK-LABEL: @reduce_allstrided( 9; CHECK-NEXT: entry: 10; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[X:%.*]], align 2 11; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64 12; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM]] 13; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 14; CHECK-NEXT: [[MUL2:%.*]] = shl nsw i32 [[STRIDE]], 1 15; CHECK-NEXT: [[IDXPROM3:%.*]] = sext i32 [[MUL2]] to i64 16; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM3]] 17; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX4]], align 2 18; CHECK-NEXT: [[MUL5:%.*]] = mul nsw i32 [[STRIDE]], 3 19; CHECK-NEXT: [[IDXPROM6:%.*]] = sext i32 [[MUL5]] to i64 20; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM6]] 21; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 22; CHECK-NEXT: [[MUL8:%.*]] = shl nsw i32 [[STRIDE]], 2 23; CHECK-NEXT: [[IDXPROM9:%.*]] = sext i32 [[MUL8]] to i64 24; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM9]] 25; CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 26; CHECK-NEXT: [[MUL11:%.*]] = mul nsw i32 [[STRIDE]], 5 27; CHECK-NEXT: [[IDXPROM12:%.*]] = sext i32 [[MUL11]] to i64 28; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM12]] 29; CHECK-NEXT: [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX13]], align 2 30; CHECK-NEXT: [[MUL14:%.*]] = mul nsw i32 [[STRIDE]], 6 31; CHECK-NEXT: [[IDXPROM15:%.*]] = sext i32 [[MUL14]] to i64 32; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM15]] 33; CHECK-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX16]], align 2 34; CHECK-NEXT: [[MUL17:%.*]] = mul nsw i32 [[STRIDE]], 7 35; CHECK-NEXT: [[IDXPROM18:%.*]] = sext i32 [[MUL17]] to i64 36; CHECK-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM18]] 37; CHECK-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX19]], align 2 38; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[Y:%.*]], align 2 39; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM]] 40; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX23]], align 2 41; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM3]] 42; CHECK-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX26]], align 2 43; CHECK-NEXT: [[ARRAYIDX29:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM6]] 44; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX29]], align 2 45; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM9]] 46; CHECK-NEXT: [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX32]], align 2 47; CHECK-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM12]] 48; CHECK-NEXT: [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX35]], align 2 49; CHECK-NEXT: [[ARRAYIDX38:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM15]] 50; CHECK-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX38]], align 2 51; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM18]] 52; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX41]], align 2 53; CHECK-NEXT: [[MUL43:%.*]] = mul i16 [[TMP8]], [[TMP0]] 54; CHECK-NEXT: [[MUL48:%.*]] = mul i16 [[TMP9]], [[TMP1]] 55; CHECK-NEXT: [[ADD49:%.*]] = add i16 [[MUL48]], [[MUL43]] 56; CHECK-NEXT: [[MUL54:%.*]] = mul i16 [[TMP10]], [[TMP2]] 57; CHECK-NEXT: [[ADD55:%.*]] = add i16 [[ADD49]], [[MUL54]] 58; CHECK-NEXT: [[MUL60:%.*]] = mul i16 [[TMP11]], [[TMP3]] 59; CHECK-NEXT: [[ADD61:%.*]] = add i16 [[ADD55]], [[MUL60]] 60; CHECK-NEXT: [[MUL66:%.*]] = mul i16 [[TMP12]], [[TMP4]] 61; CHECK-NEXT: [[ADD67:%.*]] = add i16 [[ADD61]], [[MUL66]] 62; CHECK-NEXT: [[MUL72:%.*]] = mul i16 [[TMP13]], [[TMP5]] 63; CHECK-NEXT: [[ADD73:%.*]] = add i16 [[ADD67]], [[MUL72]] 64; CHECK-NEXT: [[MUL78:%.*]] = mul i16 [[TMP14]], [[TMP6]] 65; CHECK-NEXT: [[ADD79:%.*]] = add i16 [[ADD73]], [[MUL78]] 66; CHECK-NEXT: [[MUL84:%.*]] = mul i16 [[TMP15]], [[TMP7]] 67; CHECK-NEXT: [[ADD85:%.*]] = add i16 [[ADD79]], [[MUL84]] 68; CHECK-NEXT: ret i16 [[ADD85]] 69; 70entry: 71 %0 = load i16, i16* %x, align 2 72 %idxprom = sext i32 %stride to i64 73 %arrayidx1 = getelementptr inbounds i16, i16* %x, i64 %idxprom 74 %1 = load i16, i16* %arrayidx1, align 2 75 %mul2 = shl nsw i32 %stride, 1 76 %idxprom3 = sext i32 %mul2 to i64 77 %arrayidx4 = getelementptr inbounds i16, i16* %x, i64 %idxprom3 78 %2 = load i16, i16* %arrayidx4, align 2 79 %mul5 = mul nsw i32 %stride, 3 80 %idxprom6 = sext i32 %mul5 to i64 81 %arrayidx7 = getelementptr inbounds i16, i16* %x, i64 %idxprom6 82 %3 = load i16, i16* %arrayidx7, align 2 83 %mul8 = shl nsw i32 %stride, 2 84 %idxprom9 = sext i32 %mul8 to i64 85 %arrayidx10 = getelementptr inbounds i16, i16* %x, i64 %idxprom9 86 %4 = load i16, i16* %arrayidx10, align 2 87 %mul11 = mul nsw i32 %stride, 5 88 %idxprom12 = sext i32 %mul11 to i64 89 %arrayidx13 = getelementptr inbounds i16, i16* %x, i64 %idxprom12 90 %5 = load i16, i16* %arrayidx13, align 2 91 %mul14 = mul nsw i32 %stride, 6 92 %idxprom15 = sext i32 %mul14 to i64 93 %arrayidx16 = getelementptr inbounds i16, i16* %x, i64 %idxprom15 94 %6 = load i16, i16* %arrayidx16, align 2 95 %mul17 = mul nsw i32 %stride, 7 96 %idxprom18 = sext i32 %mul17 to i64 97 %arrayidx19 = getelementptr inbounds i16, i16* %x, i64 %idxprom18 98 %7 = load i16, i16* %arrayidx19, align 2 99 %8 = load i16, i16* %y, align 2 100 %arrayidx23 = getelementptr inbounds i16, i16* %y, i64 %idxprom 101 %9 = load i16, i16* %arrayidx23, align 2 102 %arrayidx26 = getelementptr inbounds i16, i16* %y, i64 %idxprom3 103 %10 = load i16, i16* %arrayidx26, align 2 104 %arrayidx29 = getelementptr inbounds i16, i16* %y, i64 %idxprom6 105 %11 = load i16, i16* %arrayidx29, align 2 106 %arrayidx32 = getelementptr inbounds i16, i16* %y, i64 %idxprom9 107 %12 = load i16, i16* %arrayidx32, align 2 108 %arrayidx35 = getelementptr inbounds i16, i16* %y, i64 %idxprom12 109 %13 = load i16, i16* %arrayidx35, align 2 110 %arrayidx38 = getelementptr inbounds i16, i16* %y, i64 %idxprom15 111 %14 = load i16, i16* %arrayidx38, align 2 112 %arrayidx41 = getelementptr inbounds i16, i16* %y, i64 %idxprom18 113 %15 = load i16, i16* %arrayidx41, align 2 114 %mul43 = mul i16 %8, %0 115 %mul48 = mul i16 %9, %1 116 %add49 = add i16 %mul48, %mul43 117 %mul54 = mul i16 %10, %2 118 %add55 = add i16 %add49, %mul54 119 %mul60 = mul i16 %11, %3 120 %add61 = add i16 %add55, %mul60 121 %mul66 = mul i16 %12, %4 122 %add67 = add i16 %add61, %mul66 123 %mul72 = mul i16 %13, %5 124 %add73 = add i16 %add67, %mul72 125 %mul78 = mul i16 %14, %6 126 %add79 = add i16 %add73, %mul78 127 %mul84 = mul i16 %15, %7 128 %add85 = add i16 %add79, %mul84 129 ret i16 %add85 130} 131 132define i16 @reduce_blockstrided2(i16* nocapture noundef readonly %x, i16* nocapture noundef readonly %y, i32 noundef %stride) { 133; CHECK-LABEL: @reduce_blockstrided2( 134; CHECK-NEXT: entry: 135; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[X:%.*]], align 2 136; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 1 137; CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 138; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64 139; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM]] 140; CHECK-NEXT: [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 141; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[STRIDE]], 1 142; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[ADD3]] to i64 143; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM4]] 144; CHECK-NEXT: [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX5]], align 2 145; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[STRIDE]], 1 146; CHECK-NEXT: [[IDXPROM7:%.*]] = sext i32 [[MUL]] to i64 147; CHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM7]] 148; CHECK-NEXT: [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX8]], align 2 149; CHECK-NEXT: [[ADD10:%.*]] = or i32 [[MUL]], 1 150; CHECK-NEXT: [[IDXPROM11:%.*]] = sext i32 [[ADD10]] to i64 151; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM11]] 152; CHECK-NEXT: [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX12]], align 2 153; CHECK-NEXT: [[MUL13:%.*]] = mul nsw i32 [[STRIDE]], 3 154; CHECK-NEXT: [[IDXPROM15:%.*]] = sext i32 [[MUL13]] to i64 155; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM15]] 156; CHECK-NEXT: [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX16]], align 2 157; CHECK-NEXT: [[ADD18:%.*]] = add nsw i32 [[MUL13]], 1 158; CHECK-NEXT: [[IDXPROM19:%.*]] = sext i32 [[ADD18]] to i64 159; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM19]] 160; CHECK-NEXT: [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX20]], align 2 161; CHECK-NEXT: [[TMP8:%.*]] = load i16, i16* [[Y:%.*]], align 2 162; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM]] 163; CHECK-NEXT: [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX24]], align 2 164; CHECK-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM7]] 165; CHECK-NEXT: [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX28]], align 2 166; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM15]] 167; CHECK-NEXT: [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX32]], align 2 168; CHECK-NEXT: [[ARRAYIDX33:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 1 169; CHECK-NEXT: [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX33]], align 2 170; CHECK-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM4]] 171; CHECK-NEXT: [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX36]], align 2 172; CHECK-NEXT: [[ARRAYIDX40:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM11]] 173; CHECK-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX40]], align 2 174; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM19]] 175; CHECK-NEXT: [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX44]], align 2 176; CHECK-NEXT: [[MUL46:%.*]] = mul i16 [[TMP8]], [[TMP0]] 177; CHECK-NEXT: [[MUL52:%.*]] = mul i16 [[TMP12]], [[TMP1]] 178; CHECK-NEXT: [[MUL58:%.*]] = mul i16 [[TMP9]], [[TMP2]] 179; CHECK-NEXT: [[MUL64:%.*]] = mul i16 [[TMP13]], [[TMP3]] 180; CHECK-NEXT: [[MUL70:%.*]] = mul i16 [[TMP10]], [[TMP4]] 181; CHECK-NEXT: [[MUL76:%.*]] = mul i16 [[TMP14]], [[TMP5]] 182; CHECK-NEXT: [[MUL82:%.*]] = mul i16 [[TMP11]], [[TMP6]] 183; CHECK-NEXT: [[MUL88:%.*]] = mul i16 [[TMP15]], [[TMP7]] 184; CHECK-NEXT: [[ADD53:%.*]] = add i16 [[MUL58]], [[MUL46]] 185; CHECK-NEXT: [[ADD59:%.*]] = add i16 [[ADD53]], [[MUL70]] 186; CHECK-NEXT: [[ADD65:%.*]] = add i16 [[ADD59]], [[MUL82]] 187; CHECK-NEXT: [[ADD71:%.*]] = add i16 [[ADD65]], [[MUL52]] 188; CHECK-NEXT: [[ADD77:%.*]] = add i16 [[ADD71]], [[MUL64]] 189; CHECK-NEXT: [[ADD83:%.*]] = add i16 [[ADD77]], [[MUL76]] 190; CHECK-NEXT: [[ADD89:%.*]] = add i16 [[ADD83]], [[MUL88]] 191; CHECK-NEXT: ret i16 [[ADD89]] 192; 193entry: 194 %0 = load i16, i16* %x, align 2 195 %arrayidx1 = getelementptr inbounds i16, i16* %x, i64 1 196 %1 = load i16, i16* %arrayidx1, align 2 197 %idxprom = sext i32 %stride to i64 198 %arrayidx2 = getelementptr inbounds i16, i16* %x, i64 %idxprom 199 %2 = load i16, i16* %arrayidx2, align 2 200 %add3 = add nsw i32 %stride, 1 201 %idxprom4 = sext i32 %add3 to i64 202 %arrayidx5 = getelementptr inbounds i16, i16* %x, i64 %idxprom4 203 %3 = load i16, i16* %arrayidx5, align 2 204 %mul = shl nsw i32 %stride, 1 205 %idxprom7 = sext i32 %mul to i64 206 %arrayidx8 = getelementptr inbounds i16, i16* %x, i64 %idxprom7 207 %4 = load i16, i16* %arrayidx8, align 2 208 %add10 = or i32 %mul, 1 209 %idxprom11 = sext i32 %add10 to i64 210 %arrayidx12 = getelementptr inbounds i16, i16* %x, i64 %idxprom11 211 %5 = load i16, i16* %arrayidx12, align 2 212 %mul13 = mul nsw i32 %stride, 3 213 %idxprom15 = sext i32 %mul13 to i64 214 %arrayidx16 = getelementptr inbounds i16, i16* %x, i64 %idxprom15 215 %6 = load i16, i16* %arrayidx16, align 2 216 %add18 = add nsw i32 %mul13, 1 217 %idxprom19 = sext i32 %add18 to i64 218 %arrayidx20 = getelementptr inbounds i16, i16* %x, i64 %idxprom19 219 %7 = load i16, i16* %arrayidx20, align 2 220 %8 = load i16, i16* %y, align 2 221 %arrayidx24 = getelementptr inbounds i16, i16* %y, i64 %idxprom 222 %9 = load i16, i16* %arrayidx24, align 2 223 %arrayidx28 = getelementptr inbounds i16, i16* %y, i64 %idxprom7 224 %10 = load i16, i16* %arrayidx28, align 2 225 %arrayidx32 = getelementptr inbounds i16, i16* %y, i64 %idxprom15 226 %11 = load i16, i16* %arrayidx32, align 2 227 %arrayidx33 = getelementptr inbounds i16, i16* %y, i64 1 228 %12 = load i16, i16* %arrayidx33, align 2 229 %arrayidx36 = getelementptr inbounds i16, i16* %y, i64 %idxprom4 230 %13 = load i16, i16* %arrayidx36, align 2 231 %arrayidx40 = getelementptr inbounds i16, i16* %y, i64 %idxprom11 232 %14 = load i16, i16* %arrayidx40, align 2 233 %arrayidx44 = getelementptr inbounds i16, i16* %y, i64 %idxprom19 234 %15 = load i16, i16* %arrayidx44, align 2 235 %mul46 = mul i16 %8, %0 236 %mul52 = mul i16 %12, %1 237 %mul58 = mul i16 %9, %2 238 %mul64 = mul i16 %13, %3 239 %mul70 = mul i16 %10, %4 240 %mul76 = mul i16 %14, %5 241 %mul82 = mul i16 %11, %6 242 %mul88 = mul i16 %15, %7 243 %add53 = add i16 %mul58, %mul46 244 %add59 = add i16 %add53, %mul70 245 %add65 = add i16 %add59, %mul82 246 %add71 = add i16 %add65, %mul52 247 %add77 = add i16 %add71, %mul64 248 %add83 = add i16 %add77, %mul76 249 %add89 = add i16 %add83, %mul88 250 ret i16 %add89 251} 252 253define i16 @reduce_blockstrided3(i16* nocapture noundef readonly %x, i16* nocapture noundef readonly %y, i32 noundef %stride) { 254; CHECK-LABEL: @reduce_blockstrided3( 255; CHECK-NEXT: entry: 256; CHECK-NEXT: [[L0:%.*]] = load i16, i16* [[X:%.*]], align 2 257; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 1 258; CHECK-NEXT: [[L1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2 259; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 2 260; CHECK-NEXT: [[L2:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2 261; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64 262; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM]] 263; CHECK-NEXT: [[L4:%.*]] = load i16, i16* [[ARRAYIDX4]], align 2 264; CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[STRIDE]], 1 265; CHECK-NEXT: [[IDXPROM6:%.*]] = sext i32 [[ADD5]] to i64 266; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM6]] 267; CHECK-NEXT: [[L5:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2 268; CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[STRIDE]], 2 269; CHECK-NEXT: [[IDXPROM9:%.*]] = sext i32 [[ADD8]] to i64 270; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM9]] 271; CHECK-NEXT: [[L6:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2 272; CHECK-NEXT: [[L8:%.*]] = load i16, i16* [[Y:%.*]], align 2 273; CHECK-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 1 274; CHECK-NEXT: [[L9:%.*]] = load i16, i16* [[ARRAYIDX15]], align 2 275; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 2 276; CHECK-NEXT: [[L10:%.*]] = load i16, i16* [[ARRAYIDX16]], align 2 277; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM]] 278; CHECK-NEXT: [[L12:%.*]] = load i16, i16* [[ARRAYIDX20]], align 2 279; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM6]] 280; CHECK-NEXT: [[L13:%.*]] = load i16, i16* [[ARRAYIDX23]], align 2 281; CHECK-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM9]] 282; CHECK-NEXT: [[L14:%.*]] = load i16, i16* [[ARRAYIDX26]], align 2 283; CHECK-NEXT: [[MUL:%.*]] = mul i16 [[L8]], [[L0]] 284; CHECK-NEXT: [[MUL36:%.*]] = mul i16 [[L9]], [[L1]] 285; CHECK-NEXT: [[ADD37:%.*]] = add i16 [[MUL36]], [[MUL]] 286; CHECK-NEXT: [[MUL48:%.*]] = mul i16 [[L10]], [[L2]] 287; CHECK-NEXT: [[ADD49:%.*]] = add i16 [[ADD37]], [[MUL48]] 288; CHECK-NEXT: [[MUL54:%.*]] = mul i16 [[L13]], [[L5]] 289; CHECK-NEXT: [[ADD55:%.*]] = add i16 [[ADD49]], [[MUL54]] 290; CHECK-NEXT: [[MUL60:%.*]] = mul i16 [[L12]], [[L4]] 291; CHECK-NEXT: [[ADD61:%.*]] = add i16 [[ADD55]], [[MUL60]] 292; CHECK-NEXT: [[MUL72:%.*]] = mul i16 [[L14]], [[L6]] 293; CHECK-NEXT: [[ADD73:%.*]] = add i16 [[ADD61]], [[MUL72]] 294; CHECK-NEXT: ret i16 [[ADD73]] 295; 296entry: 297 %l0 = load i16, i16* %x, align 2 298 %arrayidx1 = getelementptr inbounds i16, i16* %x, i64 1 299 %l1 = load i16, i16* %arrayidx1, align 2 300 %arrayidx2 = getelementptr inbounds i16, i16* %x, i64 2 301 %l2 = load i16, i16* %arrayidx2, align 2 302 %idxprom = sext i32 %stride to i64 303 %arrayidx4 = getelementptr inbounds i16, i16* %x, i64 %idxprom 304 %l4 = load i16, i16* %arrayidx4, align 2 305 %add5 = add nsw i32 %stride, 1 306 %idxprom6 = sext i32 %add5 to i64 307 %arrayidx7 = getelementptr inbounds i16, i16* %x, i64 %idxprom6 308 %l5 = load i16, i16* %arrayidx7, align 2 309 %add8 = add nsw i32 %stride, 2 310 %idxprom9 = sext i32 %add8 to i64 311 %arrayidx10 = getelementptr inbounds i16, i16* %x, i64 %idxprom9 312 %l6 = load i16, i16* %arrayidx10, align 2 313 %add11 = add nsw i32 %stride, 3 314 %idxprom12 = sext i32 %add11 to i64 315 %l8 = load i16, i16* %y, align 2 316 %arrayidx15 = getelementptr inbounds i16, i16* %y, i64 1 317 %l9 = load i16, i16* %arrayidx15, align 2 318 %arrayidx16 = getelementptr inbounds i16, i16* %y, i64 2 319 %l10 = load i16, i16* %arrayidx16, align 2 320 %arrayidx20 = getelementptr inbounds i16, i16* %y, i64 %idxprom 321 %l12 = load i16, i16* %arrayidx20, align 2 322 %arrayidx23 = getelementptr inbounds i16, i16* %y, i64 %idxprom6 323 %l13 = load i16, i16* %arrayidx23, align 2 324 %arrayidx26 = getelementptr inbounds i16, i16* %y, i64 %idxprom9 325 %l14 = load i16, i16* %arrayidx26, align 2 326 %mul = mul i16 %l8, %l0 327 %mul36 = mul i16 %l9, %l1 328 %add37 = add i16 %mul36, %mul 329 %mul48 = mul i16 %l10, %l2 330 %add49 = add i16 %add37, %mul48 331 %mul54 = mul i16 %l13, %l5 332 %add55 = add i16 %add49, %mul54 333 %mul60 = mul i16 %l12, %l4 334 %add61 = add i16 %add55, %mul60 335 %mul72 = mul i16 %l14, %l6 336 %add73 = add i16 %add61, %mul72 337 ret i16 %add73 338} 339 340define i16 @reduce_blockstrided4(i16* nocapture noundef readonly %x, i16* nocapture noundef readonly %y, i32 noundef %stride) { 341; CHECK-LABEL: @reduce_blockstrided4( 342; CHECK-NEXT: entry: 343; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64 344; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[IDXPROM]] 345; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[IDXPROM]] 346; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[X]] to <4 x i16>* 347; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2 348; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[ARRAYIDX4]] to <4 x i16>* 349; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2 350; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[Y]] to <4 x i16>* 351; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2 352; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[ARRAYIDX20]] to <4 x i16>* 353; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[TMP6]], align 2 354; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i16> [[TMP5]], [[TMP1]] 355; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i16> [[TMP7]], [[TMP3]] 356; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 357; CHECK-NEXT: [[TMP11:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP10]]) 358; CHECK-NEXT: ret i16 [[TMP11]] 359; 360entry: 361 %0 = load i16, i16* %x, align 2 362 %arrayidx1 = getelementptr inbounds i16, i16* %x, i64 1 363 %1 = load i16, i16* %arrayidx1, align 2 364 %arrayidx2 = getelementptr inbounds i16, i16* %x, i64 2 365 %2 = load i16, i16* %arrayidx2, align 2 366 %arrayidx3 = getelementptr inbounds i16, i16* %x, i64 3 367 %3 = load i16, i16* %arrayidx3, align 2 368 %idxprom = sext i32 %stride to i64 369 %arrayidx4 = getelementptr inbounds i16, i16* %x, i64 %idxprom 370 %4 = load i16, i16* %arrayidx4, align 2 371 %add5 = add nsw i32 %stride, 1 372 %idxprom6 = sext i32 %add5 to i64 373 %arrayidx7 = getelementptr inbounds i16, i16* %x, i64 %idxprom6 374 %5 = load i16, i16* %arrayidx7, align 2 375 %add8 = add nsw i32 %stride, 2 376 %idxprom9 = sext i32 %add8 to i64 377 %arrayidx10 = getelementptr inbounds i16, i16* %x, i64 %idxprom9 378 %6 = load i16, i16* %arrayidx10, align 2 379 %add11 = add nsw i32 %stride, 3 380 %idxprom12 = sext i32 %add11 to i64 381 %arrayidx13 = getelementptr inbounds i16, i16* %x, i64 %idxprom12 382 %7 = load i16, i16* %arrayidx13, align 2 383 %8 = load i16, i16* %y, align 2 384 %arrayidx15 = getelementptr inbounds i16, i16* %y, i64 1 385 %9 = load i16, i16* %arrayidx15, align 2 386 %arrayidx16 = getelementptr inbounds i16, i16* %y, i64 2 387 %10 = load i16, i16* %arrayidx16, align 2 388 %arrayidx17 = getelementptr inbounds i16, i16* %y, i64 3 389 %11 = load i16, i16* %arrayidx17, align 2 390 %arrayidx20 = getelementptr inbounds i16, i16* %y, i64 %idxprom 391 %12 = load i16, i16* %arrayidx20, align 2 392 %arrayidx23 = getelementptr inbounds i16, i16* %y, i64 %idxprom6 393 %13 = load i16, i16* %arrayidx23, align 2 394 %arrayidx26 = getelementptr inbounds i16, i16* %y, i64 %idxprom9 395 %14 = load i16, i16* %arrayidx26, align 2 396 %arrayidx29 = getelementptr inbounds i16, i16* %y, i64 %idxprom12 397 %15 = load i16, i16* %arrayidx29, align 2 398 %mul = mul i16 %8, %0 399 %mul36 = mul i16 %9, %1 400 %add37 = add i16 %mul36, %mul 401 %mul42 = mul i16 %11, %3 402 %add43 = add i16 %add37, %mul42 403 %mul48 = mul i16 %10, %2 404 %add49 = add i16 %add43, %mul48 405 %mul54 = mul i16 %13, %5 406 %add55 = add i16 %add49, %mul54 407 %mul60 = mul i16 %12, %4 408 %add61 = add i16 %add55, %mul60 409 %mul66 = mul i16 %15, %7 410 %add67 = add i16 %add61, %mul66 411 %mul72 = mul i16 %14, %6 412 %add73 = add i16 %add67, %mul72 413 ret i16 %add73 414} 415 416define i32 @reduce_blockstrided4x4(i8* nocapture noundef readonly %p1, i32 noundef %off1, i8* nocapture noundef readonly %p2, i32 noundef %off2) { 417; CHECK-LABEL: @reduce_blockstrided4x4( 418; CHECK-NEXT: entry: 419; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[OFF1:%.*]] to i64 420; CHECK-NEXT: [[IDX_EXT63:%.*]] = sext i32 [[OFF2:%.*]] to i64 421; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[P1:%.*]], i64 4 422; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[P2:%.*]], i64 4 423; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 [[IDX_EXT]] 424; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 [[IDX_EXT63]] 425; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 4 426; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 4 427; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[P1]] to <4 x i8>* 428; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1 429; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[P2]] to <4 x i8>* 430; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 1 431; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[ARRAYIDX3]] to <4 x i8>* 432; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1 433; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[ARRAYIDX5]] to <4 x i8>* 434; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, <4 x i8>* [[TMP6]], align 1 435; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[ADD_PTR]] to <4 x i8>* 436; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1 437; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[ADD_PTR64]] to <4 x i8>* 438; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, <4 x i8>* [[TMP10]], align 1 439; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 440; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 441; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef> 442; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 443; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> 444; CHECK-NEXT: [[TMP17:%.*]] = zext <16 x i8> [[TMP16]] to <16 x i32> 445; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[ARRAYIDX3_1]] to <4 x i8>* 446; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i8>, <4 x i8>* [[TMP18]], align 1 447; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[ARRAYIDX5_1]] to <4 x i8>* 448; CHECK-NEXT: [[TMP21:%.*]] = load <4 x i8>, <4 x i8>* [[TMP20]], align 1 449; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 450; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP19]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 451; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> [[TMP23]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef> 452; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP21]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 453; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <16 x i8> [[TMP24]], <16 x i8> [[TMP25]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> 454; CHECK-NEXT: [[TMP27:%.*]] = zext <16 x i8> [[TMP26]] to <16 x i32> 455; CHECK-NEXT: [[TMP28:%.*]] = mul nuw nsw <16 x i32> [[TMP17]], [[TMP27]] 456; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP28]]) 457; CHECK-NEXT: ret i32 [[TMP29]] 458; 459entry: 460 %idx.ext = sext i32 %off1 to i64 461 %idx.ext63 = sext i32 %off2 to i64 462 463 %0 = load i8, i8* %p1, align 1 464 %conv = zext i8 %0 to i32 465 %1 = load i8, i8* %p2, align 1 466 %conv2 = zext i8 %1 to i32 467 %arrayidx3 = getelementptr inbounds i8, i8* %p1, i64 4 468 %2 = load i8, i8* %arrayidx3, align 1 469 %conv4 = zext i8 %2 to i32 470 %arrayidx5 = getelementptr inbounds i8, i8* %p2, i64 4 471 %3 = load i8, i8* %arrayidx5, align 1 472 %conv6 = zext i8 %3 to i32 473 %arrayidx8 = getelementptr inbounds i8, i8* %p1, i64 1 474 %4 = load i8, i8* %arrayidx8, align 1 475 %conv9 = zext i8 %4 to i32 476 %arrayidx10 = getelementptr inbounds i8, i8* %p2, i64 1 477 %5 = load i8, i8* %arrayidx10, align 1 478 %conv11 = zext i8 %5 to i32 479 %arrayidx13 = getelementptr inbounds i8, i8* %p1, i64 5 480 %6 = load i8, i8* %arrayidx13, align 1 481 %conv14 = zext i8 %6 to i32 482 %arrayidx15 = getelementptr inbounds i8, i8* %p2, i64 5 483 %7 = load i8, i8* %arrayidx15, align 1 484 %conv16 = zext i8 %7 to i32 485 %arrayidx20 = getelementptr inbounds i8, i8* %p1, i64 2 486 %8 = load i8, i8* %arrayidx20, align 1 487 %conv21 = zext i8 %8 to i32 488 %arrayidx22 = getelementptr inbounds i8, i8* %p2, i64 2 489 %9 = load i8, i8* %arrayidx22, align 1 490 %conv23 = zext i8 %9 to i32 491 %arrayidx25 = getelementptr inbounds i8, i8* %p1, i64 6 492 %10 = load i8, i8* %arrayidx25, align 1 493 %conv26 = zext i8 %10 to i32 494 %arrayidx27 = getelementptr inbounds i8, i8* %p2, i64 6 495 %11 = load i8, i8* %arrayidx27, align 1 496 %conv28 = zext i8 %11 to i32 497 %arrayidx32 = getelementptr inbounds i8, i8* %p1, i64 3 498 %12 = load i8, i8* %arrayidx32, align 1 499 %conv33 = zext i8 %12 to i32 500 %arrayidx34 = getelementptr inbounds i8, i8* %p2, i64 3 501 %13 = load i8, i8* %arrayidx34, align 1 502 %conv35 = zext i8 %13 to i32 503 %arrayidx37 = getelementptr inbounds i8, i8* %p1, i64 7 504 %14 = load i8, i8* %arrayidx37, align 1 505 %conv38 = zext i8 %14 to i32 506 %arrayidx39 = getelementptr inbounds i8, i8* %p2, i64 7 507 %15 = load i8, i8* %arrayidx39, align 1 508 %conv40 = zext i8 %15 to i32 509 %add.ptr = getelementptr inbounds i8, i8* %p1, i64 %idx.ext 510 %16 = load i8, i8* %add.ptr, align 1 511 %conv.1 = zext i8 %16 to i32 512 %add.ptr64 = getelementptr inbounds i8, i8* %p2, i64 %idx.ext63 513 %17 = load i8, i8* %add.ptr64, align 1 514 %conv2.1 = zext i8 %17 to i32 515 %arrayidx3.1 = getelementptr inbounds i8, i8* %add.ptr, i64 4 516 %18 = load i8, i8* %arrayidx3.1, align 1 517 %conv4.1 = zext i8 %18 to i32 518 %arrayidx5.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 4 519 %19 = load i8, i8* %arrayidx5.1, align 1 520 %conv6.1 = zext i8 %19 to i32 521 %arrayidx8.1 = getelementptr inbounds i8, i8* %add.ptr, i64 1 522 %20 = load i8, i8* %arrayidx8.1, align 1 523 %conv9.1 = zext i8 %20 to i32 524 %arrayidx10.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 1 525 %21 = load i8, i8* %arrayidx10.1, align 1 526 %conv11.1 = zext i8 %21 to i32 527 %arrayidx13.1 = getelementptr inbounds i8, i8* %add.ptr, i64 5 528 %22 = load i8, i8* %arrayidx13.1, align 1 529 %conv14.1 = zext i8 %22 to i32 530 %arrayidx15.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 5 531 %23 = load i8, i8* %arrayidx15.1, align 1 532 %conv16.1 = zext i8 %23 to i32 533 %arrayidx20.1 = getelementptr inbounds i8, i8* %add.ptr, i64 2 534 %24 = load i8, i8* %arrayidx20.1, align 1 535 %conv21.1 = zext i8 %24 to i32 536 %arrayidx22.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 2 537 %25 = load i8, i8* %arrayidx22.1, align 1 538 %conv23.1 = zext i8 %25 to i32 539 %arrayidx25.1 = getelementptr inbounds i8, i8* %add.ptr, i64 6 540 %26 = load i8, i8* %arrayidx25.1, align 1 541 %conv26.1 = zext i8 %26 to i32 542 %arrayidx27.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 6 543 %27 = load i8, i8* %arrayidx27.1, align 1 544 %conv28.1 = zext i8 %27 to i32 545 %arrayidx32.1 = getelementptr inbounds i8, i8* %add.ptr, i64 3 546 %28 = load i8, i8* %arrayidx32.1, align 1 547 %conv33.1 = zext i8 %28 to i32 548 %arrayidx34.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 3 549 %29 = load i8, i8* %arrayidx34.1, align 1 550 %conv35.1 = zext i8 %29 to i32 551 %arrayidx37.1 = getelementptr inbounds i8, i8* %add.ptr, i64 7 552 %30 = load i8, i8* %arrayidx37.1, align 1 553 %conv38.1 = zext i8 %30 to i32 554 %arrayidx39.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 7 555 %31 = load i8, i8* %arrayidx39.1, align 1 556 %conv40.1 = zext i8 %31 to i32 557 %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr, i64 %idx.ext 558 %32 = load i8, i8* %add.ptr.1, align 1 559 %conv.2 = zext i8 %32 to i32 560 %add.ptr64.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 %idx.ext63 561 %33 = load i8, i8* %add.ptr64.1, align 1 562 %conv2.2 = zext i8 %33 to i32 563 %arrayidx3.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 4 564 %34 = load i8, i8* %arrayidx3.2, align 1 565 %conv4.2 = zext i8 %34 to i32 566 %arrayidx5.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 4 567 %35 = load i8, i8* %arrayidx5.2, align 1 568 %conv6.2 = zext i8 %35 to i32 569 %arrayidx8.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 1 570 %36 = load i8, i8* %arrayidx8.2, align 1 571 %conv9.2 = zext i8 %36 to i32 572 %arrayidx10.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 1 573 %37 = load i8, i8* %arrayidx10.2, align 1 574 %conv11.2 = zext i8 %37 to i32 575 %arrayidx13.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 5 576 %38 = load i8, i8* %arrayidx13.2, align 1 577 %conv14.2 = zext i8 %38 to i32 578 %arrayidx15.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 5 579 %39 = load i8, i8* %arrayidx15.2, align 1 580 %conv16.2 = zext i8 %39 to i32 581 %arrayidx20.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 2 582 %40 = load i8, i8* %arrayidx20.2, align 1 583 %conv21.2 = zext i8 %40 to i32 584 %arrayidx22.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 2 585 %41 = load i8, i8* %arrayidx22.2, align 1 586 %conv23.2 = zext i8 %41 to i32 587 %arrayidx25.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 6 588 %42 = load i8, i8* %arrayidx25.2, align 1 589 %conv26.2 = zext i8 %42 to i32 590 %arrayidx27.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 6 591 %43 = load i8, i8* %arrayidx27.2, align 1 592 %conv28.2 = zext i8 %43 to i32 593 %arrayidx32.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 3 594 %44 = load i8, i8* %arrayidx32.2, align 1 595 %conv33.2 = zext i8 %44 to i32 596 %arrayidx34.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 3 597 %45 = load i8, i8* %arrayidx34.2, align 1 598 %conv35.2 = zext i8 %45 to i32 599 %arrayidx37.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 7 600 %46 = load i8, i8* %arrayidx37.2, align 1 601 %conv38.2 = zext i8 %46 to i32 602 %arrayidx39.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 7 603 %47 = load i8, i8* %arrayidx39.2, align 1 604 %conv40.2 = zext i8 %47 to i32 605 %add.ptr.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 %idx.ext 606 %48 = load i8, i8* %add.ptr.2, align 1 607 %conv.3 = zext i8 %48 to i32 608 %add.ptr64.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 %idx.ext63 609 %49 = load i8, i8* %add.ptr64.2, align 1 610 %conv2.3 = zext i8 %49 to i32 611 %arrayidx3.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 4 612 %50 = load i8, i8* %arrayidx3.3, align 1 613 %conv4.3 = zext i8 %50 to i32 614 %arrayidx5.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 4 615 %51 = load i8, i8* %arrayidx5.3, align 1 616 %conv6.3 = zext i8 %51 to i32 617 %arrayidx8.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 1 618 %52 = load i8, i8* %arrayidx8.3, align 1 619 %conv9.3 = zext i8 %52 to i32 620 %arrayidx10.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 1 621 %53 = load i8, i8* %arrayidx10.3, align 1 622 %conv11.3 = zext i8 %53 to i32 623 %arrayidx13.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 5 624 %54 = load i8, i8* %arrayidx13.3, align 1 625 %conv14.3 = zext i8 %54 to i32 626 %arrayidx15.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 5 627 %55 = load i8, i8* %arrayidx15.3, align 1 628 %conv16.3 = zext i8 %55 to i32 629 %arrayidx20.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 2 630 %56 = load i8, i8* %arrayidx20.3, align 1 631 %conv21.3 = zext i8 %56 to i32 632 %arrayidx22.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 2 633 %57 = load i8, i8* %arrayidx22.3, align 1 634 %conv23.3 = zext i8 %57 to i32 635 %arrayidx25.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 6 636 %58 = load i8, i8* %arrayidx25.3, align 1 637 %conv26.3 = zext i8 %58 to i32 638 %arrayidx27.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 6 639 %59 = load i8, i8* %arrayidx27.3, align 1 640 %conv28.3 = zext i8 %59 to i32 641 %arrayidx32.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 3 642 %60 = load i8, i8* %arrayidx32.3, align 1 643 %conv33.3 = zext i8 %60 to i32 644 %arrayidx34.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 3 645 %61 = load i8, i8* %arrayidx34.3, align 1 646 %conv35.3 = zext i8 %61 to i32 647 %arrayidx37.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 7 648 %62 = load i8, i8* %arrayidx37.3, align 1 649 %conv38.3 = zext i8 %62 to i32 650 %arrayidx39.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 7 651 %63 = load i8, i8* %arrayidx39.3, align 1 652 %conv40.3 = zext i8 %63 to i32 653 654 %m1 = mul i32 %conv, %conv4 655 %m2 = mul i32 %conv9, %conv14 656 %m3 = mul i32 %conv21, %conv26 657 %m4 = mul i32 %conv33, %conv38 658 %m8 = mul i32 %conv2, %conv6 659 %m7 = mul i32 %conv11, %conv16 660 %m6 = mul i32 %conv23, %conv28 661 %m5 = mul i32 %conv35, %conv40 662 %m9 = mul i32 %conv.1, %conv4.1 663 %m10 = mul i32 %conv9.1, %conv14.1 664 %m11 = mul i32 %conv21.1, %conv26.1 665 %m12 = mul i32 %conv33.1, %conv38.1 666 %m16 = mul i32 %conv2.1, %conv6.1 667 %m15 = mul i32 %conv11.1, %conv16.1 668 %m14 = mul i32 %conv23.1, %conv28.1 669 %m13 = mul i32 %conv35.1, %conv40.1 670 671 %a2 = add i32 %m1, %m2 672 %a3 = add i32 %a2, %m3 673 %a4 = add i32 %a3, %m4 674 %a5 = add i32 %a4, %m5 675 %a6 = add i32 %a5, %m6 676 %a7 = add i32 %a6, %m7 677 %a8 = add i32 %a7, %m8 678 %a9 = add i32 %a8, %m9 679 %a10 = add i32 %a9, %m10 680 %a11 = add i32 %a10, %m11 681 %a12 = add i32 %a11, %m12 682 %a13 = add i32 %a12, %m13 683 %a14 = add i32 %a13, %m14 684 %a15 = add i32 %a14, %m15 685 %a16 = add i32 %a15, %m16 686 ret i32 %a16 687} 688 689define void @store_blockstrided3(i32* nocapture noundef readonly %x, i32* nocapture noundef readonly %y, i32* nocapture noundef writeonly %z, i32 noundef %stride) { 690; CHECK-LABEL: @store_blockstrided3( 691; CHECK-NEXT: entry: 692; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 2 693; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 694; CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[STRIDE:%.*]], 1 695; CHECK-NEXT: [[IDXPROM5:%.*]] = sext i32 [[ADD4]] to i64 696; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM5]] 697; CHECK-NEXT: [[MUL:%.*]] = shl nsw i32 [[STRIDE]], 1 698; CHECK-NEXT: [[IDXPROM11:%.*]] = sext i32 [[MUL]] to i64 699; CHECK-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM11]] 700; CHECK-NEXT: [[ADD18:%.*]] = add nsw i32 [[MUL]], 2 701; CHECK-NEXT: [[IDXPROM19:%.*]] = sext i32 [[ADD18]] to i64 702; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM19]] 703; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4 704; CHECK-NEXT: [[MUL21:%.*]] = mul nsw i32 [[STRIDE]], 3 705; CHECK-NEXT: [[IDXPROM23:%.*]] = sext i32 [[MUL21]] to i64 706; CHECK-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM23]] 707; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX24]], align 4 708; CHECK-NEXT: [[ADD26:%.*]] = add nsw i32 [[MUL21]], 1 709; CHECK-NEXT: [[IDXPROM27:%.*]] = sext i32 [[ADD26]] to i64 710; CHECK-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM27]] 711; CHECK-NEXT: [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i64 2 712; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX35]], align 4 713; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[IDXPROM5]] 714; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[IDXPROM11]] 715; CHECK-NEXT: [[ARRAYIDX56:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[IDXPROM19]] 716; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX56]], align 4 717; CHECK-NEXT: [[ARRAYIDX60:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[IDXPROM23]] 718; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX60]], align 4 719; CHECK-NEXT: [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[IDXPROM27]] 720; CHECK-NEXT: [[ARRAYIDX72:%.*]] = getelementptr inbounds i32, i32* [[Z:%.*]], i64 1 721; CHECK-NEXT: [[MUL73:%.*]] = mul nsw i32 [[TMP3]], [[TMP0]] 722; CHECK-NEXT: [[ARRAYIDX76:%.*]] = getelementptr inbounds i32, i32* [[Z]], i64 6 723; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[X]] to <2 x i32>* 724; CHECK-NEXT: [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[TMP6]], align 4 725; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX6]] to <2 x i32>* 726; CHECK-NEXT: [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[TMP8]], align 4 727; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[Y]] to <2 x i32>* 728; CHECK-NEXT: [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[TMP10]], align 4 729; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[ARRAYIDX41]] to <2 x i32>* 730; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i32>, <2 x i32>* [[TMP12]], align 4 731; CHECK-NEXT: [[TMP14:%.*]] = mul nsw <2 x i32> [[TMP11]], [[TMP7]] 732; CHECK-NEXT: [[TMP15:%.*]] = mul nsw <2 x i32> [[TMP13]], [[TMP9]] 733; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], <4 x i32> <i32 1, i32 0, i32 3, i32 2> 734; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[ARRAYIDX72]] to <4 x i32>* 735; CHECK-NEXT: [[ARRAYIDX84:%.*]] = getelementptr inbounds i32, i32* [[Z]], i64 7 736; CHECK-NEXT: [[MUL85:%.*]] = mul nsw i32 [[TMP4]], [[TMP1]] 737; CHECK-NEXT: [[MUL87:%.*]] = mul nsw i32 [[TMP5]], [[TMP2]] 738; CHECK-NEXT: [[ARRAYIDX88:%.*]] = getelementptr inbounds i32, i32* [[Z]], i64 11 739; CHECK-NEXT: [[TMP17:%.*]] = bitcast i32* [[ARRAYIDX12]] to <2 x i32>* 740; CHECK-NEXT: [[TMP18:%.*]] = load <2 x i32>, <2 x i32>* [[TMP17]], align 4 741; CHECK-NEXT: [[TMP19:%.*]] = bitcast i32* [[ARRAYIDX28]] to <2 x i32>* 742; CHECK-NEXT: [[TMP20:%.*]] = load <2 x i32>, <2 x i32>* [[TMP19]], align 4 743; CHECK-NEXT: [[TMP21:%.*]] = bitcast i32* [[ARRAYIDX48]] to <2 x i32>* 744; CHECK-NEXT: [[TMP22:%.*]] = load <2 x i32>, <2 x i32>* [[TMP21]], align 4 745; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[ARRAYIDX64]] to <2 x i32>* 746; CHECK-NEXT: [[TMP24:%.*]] = load <2 x i32>, <2 x i32>* [[TMP23]], align 4 747; CHECK-NEXT: store i32 [[MUL73]], i32* [[Z]], align 4 748; CHECK-NEXT: store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP16]], align 4 749; CHECK-NEXT: store i32 [[MUL85]], i32* [[ARRAYIDX76]], align 4 750; CHECK-NEXT: store i32 [[MUL87]], i32* [[ARRAYIDX88]], align 4 751; CHECK-NEXT: [[TMP25:%.*]] = mul nsw <2 x i32> [[TMP22]], [[TMP18]] 752; CHECK-NEXT: [[TMP26:%.*]] = mul nsw <2 x i32> [[TMP24]], [[TMP20]] 753; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP25]], <2 x i32> [[TMP26]], <4 x i32> <i32 1, i32 0, i32 3, i32 2> 754; CHECK-NEXT: [[TMP27:%.*]] = bitcast i32* [[ARRAYIDX84]] to <4 x i32>* 755; CHECK-NEXT: store <4 x i32> [[SHUFFLE1]], <4 x i32>* [[TMP27]], align 4 756; CHECK-NEXT: ret void 757; 758entry: 759 %0 = load i32, i32* %x, align 4 760 %arrayidx1 = getelementptr inbounds i32, i32* %x, i64 1 761 %1 = load i32, i32* %arrayidx1, align 4 762 %arrayidx2 = getelementptr inbounds i32, i32* %x, i64 2 763 %2 = load i32, i32* %arrayidx2, align 4 764 %add4 = add nsw i32 %stride, 1 765 %idxprom5 = sext i32 %add4 to i64 766 %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %idxprom5 767 %3 = load i32, i32* %arrayidx6, align 4 768 %add7 = add nsw i32 %stride, 2 769 %idxprom8 = sext i32 %add7 to i64 770 %arrayidx9 = getelementptr inbounds i32, i32* %x, i64 %idxprom8 771 %4 = load i32, i32* %arrayidx9, align 4 772 %mul = shl nsw i32 %stride, 1 773 %idxprom11 = sext i32 %mul to i64 774 %arrayidx12 = getelementptr inbounds i32, i32* %x, i64 %idxprom11 775 %5 = load i32, i32* %arrayidx12, align 4 776 %add14 = or i32 %mul, 1 777 %idxprom15 = sext i32 %add14 to i64 778 %arrayidx16 = getelementptr inbounds i32, i32* %x, i64 %idxprom15 779 %6 = load i32, i32* %arrayidx16, align 4 780 %add18 = add nsw i32 %mul, 2 781 %idxprom19 = sext i32 %add18 to i64 782 %arrayidx20 = getelementptr inbounds i32, i32* %x, i64 %idxprom19 783 %7 = load i32, i32* %arrayidx20, align 4 784 %mul21 = mul nsw i32 %stride, 3 785 %idxprom23 = sext i32 %mul21 to i64 786 %arrayidx24 = getelementptr inbounds i32, i32* %x, i64 %idxprom23 787 %8 = load i32, i32* %arrayidx24, align 4 788 %add26 = add nsw i32 %mul21, 1 789 %idxprom27 = sext i32 %add26 to i64 790 %arrayidx28 = getelementptr inbounds i32, i32* %x, i64 %idxprom27 791 %9 = load i32, i32* %arrayidx28, align 4 792 %add30 = add nsw i32 %mul21, 2 793 %idxprom31 = sext i32 %add30 to i64 794 %arrayidx32 = getelementptr inbounds i32, i32* %x, i64 %idxprom31 795 %10 = load i32, i32* %arrayidx32, align 4 796 %11 = load i32, i32* %y, align 4 797 %arrayidx34 = getelementptr inbounds i32, i32* %y, i64 1 798 %12 = load i32, i32* %arrayidx34, align 4 799 %arrayidx35 = getelementptr inbounds i32, i32* %y, i64 2 800 %13 = load i32, i32* %arrayidx35, align 4 801 %arrayidx41 = getelementptr inbounds i32, i32* %y, i64 %idxprom5 802 %14 = load i32, i32* %arrayidx41, align 4 803 %arrayidx44 = getelementptr inbounds i32, i32* %y, i64 %idxprom8 804 %15 = load i32, i32* %arrayidx44, align 4 805 %arrayidx48 = getelementptr inbounds i32, i32* %y, i64 %idxprom11 806 %16 = load i32, i32* %arrayidx48, align 4 807 %arrayidx52 = getelementptr inbounds i32, i32* %y, i64 %idxprom15 808 %17 = load i32, i32* %arrayidx52, align 4 809 %arrayidx56 = getelementptr inbounds i32, i32* %y, i64 %idxprom19 810 %18 = load i32, i32* %arrayidx56, align 4 811 %arrayidx60 = getelementptr inbounds i32, i32* %y, i64 %idxprom23 812 %19 = load i32, i32* %arrayidx60, align 4 813 %arrayidx64 = getelementptr inbounds i32, i32* %y, i64 %idxprom27 814 %20 = load i32, i32* %arrayidx64, align 4 815 %arrayidx68 = getelementptr inbounds i32, i32* %y, i64 %idxprom31 816 %21 = load i32, i32* %arrayidx68, align 4 817 %mul69 = mul nsw i32 %11, %0 818 %arrayidx70 = getelementptr inbounds i32, i32* %z, i64 2 819 store i32 %mul69, i32* %arrayidx70, align 4 820 %mul71 = mul nsw i32 %12, %1 821 %arrayidx72 = getelementptr inbounds i32, i32* %z, i64 1 822 store i32 %mul71, i32* %arrayidx72, align 4 823 %mul73 = mul nsw i32 %13, %2 824 store i32 %mul73, i32* %z, align 4 825 %arrayidx76 = getelementptr inbounds i32, i32* %z, i64 6 826 %mul77 = mul nsw i32 %14, %3 827 %arrayidx78 = getelementptr inbounds i32, i32* %z, i64 4 828 store i32 %mul77, i32* %arrayidx78, align 4 829 %mul79 = mul nsw i32 %15, %4 830 %arrayidx80 = getelementptr inbounds i32, i32* %z, i64 3 831 store i32 %mul79, i32* %arrayidx80, align 4 832 %mul81 = mul nsw i32 %16, %5 833 %arrayidx82 = getelementptr inbounds i32, i32* %z, i64 8 834 store i32 %mul81, i32* %arrayidx82, align 4 835 %mul83 = mul nsw i32 %17, %6 836 %arrayidx84 = getelementptr inbounds i32, i32* %z, i64 7 837 store i32 %mul83, i32* %arrayidx84, align 4 838 %mul85 = mul nsw i32 %18, %7 839 store i32 %mul85, i32* %arrayidx76, align 4 840 %mul87 = mul nsw i32 %19, %8 841 %arrayidx88 = getelementptr inbounds i32, i32* %z, i64 11 842 store i32 %mul87, i32* %arrayidx88, align 4 843 %mul89 = mul nsw i32 %20, %9 844 %arrayidx90 = getelementptr inbounds i32, i32* %z, i64 10 845 store i32 %mul89, i32* %arrayidx90, align 4 846 %mul91 = mul nsw i32 %21, %10 847 %arrayidx92 = getelementptr inbounds i32, i32* %z, i64 9 848 store i32 %mul91, i32* %arrayidx92, align 4 849 ret void 850} 851 852define void @store_blockstrided4(i16* nocapture noundef readonly %x, i16* nocapture noundef readonly %y, i32 noundef %stride, i16 *%dst0) { 853; CHECK-LABEL: @store_blockstrided4( 854; CHECK-NEXT: entry: 855; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64 856; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[IDXPROM]] 857; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[IDXPROM]] 858; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[X]] to <4 x i16>* 859; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2 860; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16* [[ARRAYIDX4]] to <4 x i16>* 861; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2 862; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[Y]] to <4 x i16>* 863; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2 864; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16* [[ARRAYIDX20]] to <4 x i16>* 865; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[TMP6]], align 2 866; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i16> [[TMP5]], [[TMP1]] 867; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i16> [[TMP7]], [[TMP3]] 868; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6> 869; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16* [[DST0:%.*]] to <8 x i16>* 870; CHECK-NEXT: store <8 x i16> [[SHUFFLE]], <8 x i16>* [[TMP10]], align 2 871; CHECK-NEXT: ret void 872; 873entry: 874 %0 = load i16, i16* %x, align 2 875 %arrayidx1 = getelementptr inbounds i16, i16* %x, i64 1 876 %1 = load i16, i16* %arrayidx1, align 2 877 %arrayidx2 = getelementptr inbounds i16, i16* %x, i64 2 878 %2 = load i16, i16* %arrayidx2, align 2 879 %arrayidx3 = getelementptr inbounds i16, i16* %x, i64 3 880 %3 = load i16, i16* %arrayidx3, align 2 881 %idxprom = sext i32 %stride to i64 882 %arrayidx4 = getelementptr inbounds i16, i16* %x, i64 %idxprom 883 %4 = load i16, i16* %arrayidx4, align 2 884 %add5 = add nsw i32 %stride, 1 885 %idxprom6 = sext i32 %add5 to i64 886 %arrayidx7 = getelementptr inbounds i16, i16* %x, i64 %idxprom6 887 %5 = load i16, i16* %arrayidx7, align 2 888 %add8 = add nsw i32 %stride, 2 889 %idxprom9 = sext i32 %add8 to i64 890 %arrayidx10 = getelementptr inbounds i16, i16* %x, i64 %idxprom9 891 %6 = load i16, i16* %arrayidx10, align 2 892 %add11 = add nsw i32 %stride, 3 893 %idxprom12 = sext i32 %add11 to i64 894 %arrayidx13 = getelementptr inbounds i16, i16* %x, i64 %idxprom12 895 %7 = load i16, i16* %arrayidx13, align 2 896 %8 = load i16, i16* %y, align 2 897 %arrayidx15 = getelementptr inbounds i16, i16* %y, i64 1 898 %9 = load i16, i16* %arrayidx15, align 2 899 %arrayidx16 = getelementptr inbounds i16, i16* %y, i64 2 900 %10 = load i16, i16* %arrayidx16, align 2 901 %arrayidx17 = getelementptr inbounds i16, i16* %y, i64 3 902 %11 = load i16, i16* %arrayidx17, align 2 903 %arrayidx20 = getelementptr inbounds i16, i16* %y, i64 %idxprom 904 %12 = load i16, i16* %arrayidx20, align 2 905 %arrayidx23 = getelementptr inbounds i16, i16* %y, i64 %idxprom6 906 %13 = load i16, i16* %arrayidx23, align 2 907 %arrayidx26 = getelementptr inbounds i16, i16* %y, i64 %idxprom9 908 %14 = load i16, i16* %arrayidx26, align 2 909 %arrayidx29 = getelementptr inbounds i16, i16* %y, i64 %idxprom12 910 %15 = load i16, i16* %arrayidx29, align 2 911 %mul = mul i16 %8, %0 912 %mul36 = mul i16 %9, %1 913 %mul42 = mul i16 %11, %3 914 %mul48 = mul i16 %10, %2 915 %mul54 = mul i16 %13, %5 916 %mul60 = mul i16 %12, %4 917 %mul66 = mul i16 %15, %7 918 %mul72 = mul i16 %14, %6 919 %dst1 = getelementptr inbounds i16, i16* %dst0, i64 1 920 %dst2 = getelementptr inbounds i16, i16* %dst0, i64 2 921 %dst3 = getelementptr inbounds i16, i16* %dst0, i64 3 922 %dst4 = getelementptr inbounds i16, i16* %dst0, i64 4 923 %dst5 = getelementptr inbounds i16, i16* %dst0, i64 5 924 %dst6 = getelementptr inbounds i16, i16* %dst0, i64 6 925 %dst7 = getelementptr inbounds i16, i16* %dst0, i64 7 926 store i16 %mul, i16* %dst0 927 store i16 %mul36, i16* %dst1 928 store i16 %mul42, i16* %dst2 929 store i16 %mul48, i16* %dst3 930 store i16 %mul54, i16* %dst4 931 store i16 %mul60, i16* %dst5 932 store i16 %mul66, i16* %dst6 933 store i16 %mul72, i16* %dst7 934 ret void 935} 936 937define void @store_blockstrided4x4(i8* nocapture noundef readonly %p1, i32 noundef %off1, i8* nocapture noundef readonly %p2, i32 noundef %off2, i32 *%dst0) { 938; CHECK-LABEL: @store_blockstrided4x4( 939; CHECK-NEXT: entry: 940; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[OFF1:%.*]] to i64 941; CHECK-NEXT: [[IDX_EXT63:%.*]] = sext i32 [[OFF2:%.*]] to i64 942; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[P1:%.*]], i64 4 943; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[P2:%.*]], i64 4 944; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 [[IDX_EXT]] 945; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 [[IDX_EXT63]] 946; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 4 947; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 4 948; CHECK-NEXT: [[DST4:%.*]] = getelementptr inbounds i32, i32* [[DST0:%.*]], i64 4 949; CHECK-NEXT: [[DST8:%.*]] = getelementptr inbounds i32, i32* [[DST0]], i64 8 950; CHECK-NEXT: [[DST12:%.*]] = getelementptr inbounds i32, i32* [[DST0]], i64 12 951; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[P1]] to <4 x i8>* 952; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1 953; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32> 954; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8* [[ARRAYIDX3]] to <4 x i8>* 955; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1 956; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32> 957; CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw <4 x i32> [[TMP2]], [[TMP5]] 958; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[DST0]] to <4 x i32>* 959; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[P2]] to <4 x i8>* 960; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1 961; CHECK-NEXT: [[TMP10:%.*]] = zext <4 x i8> [[TMP9]] to <4 x i32> 962; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8* [[ARRAYIDX5]] to <4 x i8>* 963; CHECK-NEXT: [[TMP12:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1 964; CHECK-NEXT: [[TMP13:%.*]] = zext <4 x i8> [[TMP12]] to <4 x i32> 965; CHECK-NEXT: [[TMP14:%.*]] = mul nuw nsw <4 x i32> [[TMP10]], [[TMP13]] 966; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32* [[DST4]] to <4 x i32>* 967; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[ADD_PTR]] to <4 x i8>* 968; CHECK-NEXT: [[TMP17:%.*]] = load <4 x i8>, <4 x i8>* [[TMP16]], align 1 969; CHECK-NEXT: [[TMP18:%.*]] = zext <4 x i8> [[TMP17]] to <4 x i32> 970; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8* [[ARRAYIDX3_1]] to <4 x i8>* 971; CHECK-NEXT: [[TMP20:%.*]] = load <4 x i8>, <4 x i8>* [[TMP19]], align 1 972; CHECK-NEXT: [[TMP21:%.*]] = zext <4 x i8> [[TMP20]] to <4 x i32> 973; CHECK-NEXT: [[TMP22:%.*]] = mul nuw nsw <4 x i32> [[TMP18]], [[TMP21]] 974; CHECK-NEXT: [[TMP23:%.*]] = bitcast i32* [[DST8]] to <4 x i32>* 975; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8* [[ADD_PTR64]] to <4 x i8>* 976; CHECK-NEXT: [[TMP25:%.*]] = load <4 x i8>, <4 x i8>* [[TMP24]], align 1 977; CHECK-NEXT: [[TMP26:%.*]] = zext <4 x i8> [[TMP25]] to <4 x i32> 978; CHECK-NEXT: [[TMP27:%.*]] = bitcast i8* [[ARRAYIDX5_1]] to <4 x i8>* 979; CHECK-NEXT: [[TMP28:%.*]] = load <4 x i8>, <4 x i8>* [[TMP27]], align 1 980; CHECK-NEXT: [[TMP29:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32> 981; CHECK-NEXT: [[TMP30:%.*]] = mul nuw nsw <4 x i32> [[TMP26]], [[TMP29]] 982; CHECK-NEXT: store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4 983; CHECK-NEXT: store <4 x i32> [[TMP14]], <4 x i32>* [[TMP15]], align 4 984; CHECK-NEXT: store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 4 985; CHECK-NEXT: [[TMP31:%.*]] = bitcast i32* [[DST12]] to <4 x i32>* 986; CHECK-NEXT: store <4 x i32> [[TMP30]], <4 x i32>* [[TMP31]], align 4 987; CHECK-NEXT: ret void 988; 989entry: 990 %idx.ext = sext i32 %off1 to i64 991 %idx.ext63 = sext i32 %off2 to i64 992 993 %0 = load i8, i8* %p1, align 1 994 %conv = zext i8 %0 to i32 995 %1 = load i8, i8* %p2, align 1 996 %conv2 = zext i8 %1 to i32 997 %arrayidx3 = getelementptr inbounds i8, i8* %p1, i64 4 998 %2 = load i8, i8* %arrayidx3, align 1 999 %conv4 = zext i8 %2 to i32 1000 %arrayidx5 = getelementptr inbounds i8, i8* %p2, i64 4 1001 %3 = load i8, i8* %arrayidx5, align 1 1002 %conv6 = zext i8 %3 to i32 1003 %arrayidx8 = getelementptr inbounds i8, i8* %p1, i64 1 1004 %4 = load i8, i8* %arrayidx8, align 1 1005 %conv9 = zext i8 %4 to i32 1006 %arrayidx10 = getelementptr inbounds i8, i8* %p2, i64 1 1007 %5 = load i8, i8* %arrayidx10, align 1 1008 %conv11 = zext i8 %5 to i32 1009 %arrayidx13 = getelementptr inbounds i8, i8* %p1, i64 5 1010 %6 = load i8, i8* %arrayidx13, align 1 1011 %conv14 = zext i8 %6 to i32 1012 %arrayidx15 = getelementptr inbounds i8, i8* %p2, i64 5 1013 %7 = load i8, i8* %arrayidx15, align 1 1014 %conv16 = zext i8 %7 to i32 1015 %arrayidx20 = getelementptr inbounds i8, i8* %p1, i64 2 1016 %8 = load i8, i8* %arrayidx20, align 1 1017 %conv21 = zext i8 %8 to i32 1018 %arrayidx22 = getelementptr inbounds i8, i8* %p2, i64 2 1019 %9 = load i8, i8* %arrayidx22, align 1 1020 %conv23 = zext i8 %9 to i32 1021 %arrayidx25 = getelementptr inbounds i8, i8* %p1, i64 6 1022 %10 = load i8, i8* %arrayidx25, align 1 1023 %conv26 = zext i8 %10 to i32 1024 %arrayidx27 = getelementptr inbounds i8, i8* %p2, i64 6 1025 %11 = load i8, i8* %arrayidx27, align 1 1026 %conv28 = zext i8 %11 to i32 1027 %arrayidx32 = getelementptr inbounds i8, i8* %p1, i64 3 1028 %12 = load i8, i8* %arrayidx32, align 1 1029 %conv33 = zext i8 %12 to i32 1030 %arrayidx34 = getelementptr inbounds i8, i8* %p2, i64 3 1031 %13 = load i8, i8* %arrayidx34, align 1 1032 %conv35 = zext i8 %13 to i32 1033 %arrayidx37 = getelementptr inbounds i8, i8* %p1, i64 7 1034 %14 = load i8, i8* %arrayidx37, align 1 1035 %conv38 = zext i8 %14 to i32 1036 %arrayidx39 = getelementptr inbounds i8, i8* %p2, i64 7 1037 %15 = load i8, i8* %arrayidx39, align 1 1038 %conv40 = zext i8 %15 to i32 1039 %add.ptr = getelementptr inbounds i8, i8* %p1, i64 %idx.ext 1040 %16 = load i8, i8* %add.ptr, align 1 1041 %conv.1 = zext i8 %16 to i32 1042 %add.ptr64 = getelementptr inbounds i8, i8* %p2, i64 %idx.ext63 1043 %17 = load i8, i8* %add.ptr64, align 1 1044 %conv2.1 = zext i8 %17 to i32 1045 %arrayidx3.1 = getelementptr inbounds i8, i8* %add.ptr, i64 4 1046 %18 = load i8, i8* %arrayidx3.1, align 1 1047 %conv4.1 = zext i8 %18 to i32 1048 %arrayidx5.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 4 1049 %19 = load i8, i8* %arrayidx5.1, align 1 1050 %conv6.1 = zext i8 %19 to i32 1051 %arrayidx8.1 = getelementptr inbounds i8, i8* %add.ptr, i64 1 1052 %20 = load i8, i8* %arrayidx8.1, align 1 1053 %conv9.1 = zext i8 %20 to i32 1054 %arrayidx10.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 1 1055 %21 = load i8, i8* %arrayidx10.1, align 1 1056 %conv11.1 = zext i8 %21 to i32 1057 %arrayidx13.1 = getelementptr inbounds i8, i8* %add.ptr, i64 5 1058 %22 = load i8, i8* %arrayidx13.1, align 1 1059 %conv14.1 = zext i8 %22 to i32 1060 %arrayidx15.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 5 1061 %23 = load i8, i8* %arrayidx15.1, align 1 1062 %conv16.1 = zext i8 %23 to i32 1063 %arrayidx20.1 = getelementptr inbounds i8, i8* %add.ptr, i64 2 1064 %24 = load i8, i8* %arrayidx20.1, align 1 1065 %conv21.1 = zext i8 %24 to i32 1066 %arrayidx22.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 2 1067 %25 = load i8, i8* %arrayidx22.1, align 1 1068 %conv23.1 = zext i8 %25 to i32 1069 %arrayidx25.1 = getelementptr inbounds i8, i8* %add.ptr, i64 6 1070 %26 = load i8, i8* %arrayidx25.1, align 1 1071 %conv26.1 = zext i8 %26 to i32 1072 %arrayidx27.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 6 1073 %27 = load i8, i8* %arrayidx27.1, align 1 1074 %conv28.1 = zext i8 %27 to i32 1075 %arrayidx32.1 = getelementptr inbounds i8, i8* %add.ptr, i64 3 1076 %28 = load i8, i8* %arrayidx32.1, align 1 1077 %conv33.1 = zext i8 %28 to i32 1078 %arrayidx34.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 3 1079 %29 = load i8, i8* %arrayidx34.1, align 1 1080 %conv35.1 = zext i8 %29 to i32 1081 %arrayidx37.1 = getelementptr inbounds i8, i8* %add.ptr, i64 7 1082 %30 = load i8, i8* %arrayidx37.1, align 1 1083 %conv38.1 = zext i8 %30 to i32 1084 %arrayidx39.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 7 1085 %31 = load i8, i8* %arrayidx39.1, align 1 1086 %conv40.1 = zext i8 %31 to i32 1087 %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr, i64 %idx.ext 1088 %32 = load i8, i8* %add.ptr.1, align 1 1089 %conv.2 = zext i8 %32 to i32 1090 %add.ptr64.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 %idx.ext63 1091 %33 = load i8, i8* %add.ptr64.1, align 1 1092 %conv2.2 = zext i8 %33 to i32 1093 %arrayidx3.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 4 1094 %34 = load i8, i8* %arrayidx3.2, align 1 1095 %conv4.2 = zext i8 %34 to i32 1096 %arrayidx5.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 4 1097 %35 = load i8, i8* %arrayidx5.2, align 1 1098 %conv6.2 = zext i8 %35 to i32 1099 %arrayidx8.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 1 1100 %36 = load i8, i8* %arrayidx8.2, align 1 1101 %conv9.2 = zext i8 %36 to i32 1102 %arrayidx10.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 1 1103 %37 = load i8, i8* %arrayidx10.2, align 1 1104 %conv11.2 = zext i8 %37 to i32 1105 %arrayidx13.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 5 1106 %38 = load i8, i8* %arrayidx13.2, align 1 1107 %conv14.2 = zext i8 %38 to i32 1108 %arrayidx15.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 5 1109 %39 = load i8, i8* %arrayidx15.2, align 1 1110 %conv16.2 = zext i8 %39 to i32 1111 %arrayidx20.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 2 1112 %40 = load i8, i8* %arrayidx20.2, align 1 1113 %conv21.2 = zext i8 %40 to i32 1114 %arrayidx22.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 2 1115 %41 = load i8, i8* %arrayidx22.2, align 1 1116 %conv23.2 = zext i8 %41 to i32 1117 %arrayidx25.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 6 1118 %42 = load i8, i8* %arrayidx25.2, align 1 1119 %conv26.2 = zext i8 %42 to i32 1120 %arrayidx27.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 6 1121 %43 = load i8, i8* %arrayidx27.2, align 1 1122 %conv28.2 = zext i8 %43 to i32 1123 %arrayidx32.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 3 1124 %44 = load i8, i8* %arrayidx32.2, align 1 1125 %conv33.2 = zext i8 %44 to i32 1126 %arrayidx34.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 3 1127 %45 = load i8, i8* %arrayidx34.2, align 1 1128 %conv35.2 = zext i8 %45 to i32 1129 %arrayidx37.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 7 1130 %46 = load i8, i8* %arrayidx37.2, align 1 1131 %conv38.2 = zext i8 %46 to i32 1132 %arrayidx39.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 7 1133 %47 = load i8, i8* %arrayidx39.2, align 1 1134 %conv40.2 = zext i8 %47 to i32 1135 %add.ptr.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 %idx.ext 1136 %48 = load i8, i8* %add.ptr.2, align 1 1137 %conv.3 = zext i8 %48 to i32 1138 %add.ptr64.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 %idx.ext63 1139 %49 = load i8, i8* %add.ptr64.2, align 1 1140 %conv2.3 = zext i8 %49 to i32 1141 %arrayidx3.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 4 1142 %50 = load i8, i8* %arrayidx3.3, align 1 1143 %conv4.3 = zext i8 %50 to i32 1144 %arrayidx5.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 4 1145 %51 = load i8, i8* %arrayidx5.3, align 1 1146 %conv6.3 = zext i8 %51 to i32 1147 %arrayidx8.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 1 1148 %52 = load i8, i8* %arrayidx8.3, align 1 1149 %conv9.3 = zext i8 %52 to i32 1150 %arrayidx10.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 1 1151 %53 = load i8, i8* %arrayidx10.3, align 1 1152 %conv11.3 = zext i8 %53 to i32 1153 %arrayidx13.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 5 1154 %54 = load i8, i8* %arrayidx13.3, align 1 1155 %conv14.3 = zext i8 %54 to i32 1156 %arrayidx15.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 5 1157 %55 = load i8, i8* %arrayidx15.3, align 1 1158 %conv16.3 = zext i8 %55 to i32 1159 %arrayidx20.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 2 1160 %56 = load i8, i8* %arrayidx20.3, align 1 1161 %conv21.3 = zext i8 %56 to i32 1162 %arrayidx22.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 2 1163 %57 = load i8, i8* %arrayidx22.3, align 1 1164 %conv23.3 = zext i8 %57 to i32 1165 %arrayidx25.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 6 1166 %58 = load i8, i8* %arrayidx25.3, align 1 1167 %conv26.3 = zext i8 %58 to i32 1168 %arrayidx27.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 6 1169 %59 = load i8, i8* %arrayidx27.3, align 1 1170 %conv28.3 = zext i8 %59 to i32 1171 %arrayidx32.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 3 1172 %60 = load i8, i8* %arrayidx32.3, align 1 1173 %conv33.3 = zext i8 %60 to i32 1174 %arrayidx34.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 3 1175 %61 = load i8, i8* %arrayidx34.3, align 1 1176 %conv35.3 = zext i8 %61 to i32 1177 %arrayidx37.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 7 1178 %62 = load i8, i8* %arrayidx37.3, align 1 1179 %conv38.3 = zext i8 %62 to i32 1180 %arrayidx39.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 7 1181 %63 = load i8, i8* %arrayidx39.3, align 1 1182 %conv40.3 = zext i8 %63 to i32 1183 1184 %m1 = mul i32 %conv, %conv4 1185 %m2 = mul i32 %conv9, %conv14 1186 %m3 = mul i32 %conv21, %conv26 1187 %m4 = mul i32 %conv33, %conv38 1188 %m5 = mul i32 %conv2, %conv6 1189 %m6 = mul i32 %conv11, %conv16 1190 %m7 = mul i32 %conv23, %conv28 1191 %m8 = mul i32 %conv35, %conv40 1192 %m9 = mul i32 %conv.1, %conv4.1 1193 %m10 = mul i32 %conv9.1, %conv14.1 1194 %m11 = mul i32 %conv21.1, %conv26.1 1195 %m12 = mul i32 %conv33.1, %conv38.1 1196 %m13 = mul i32 %conv2.1, %conv6.1 1197 %m14 = mul i32 %conv11.1, %conv16.1 1198 %m15 = mul i32 %conv23.1, %conv28.1 1199 %m16 = mul i32 %conv35.1, %conv40.1 1200 1201 %dst1 = getelementptr inbounds i32, i32* %dst0, i64 1 1202 %dst2 = getelementptr inbounds i32, i32* %dst0, i64 2 1203 %dst3 = getelementptr inbounds i32, i32* %dst0, i64 3 1204 %dst4 = getelementptr inbounds i32, i32* %dst0, i64 4 1205 %dst5 = getelementptr inbounds i32, i32* %dst0, i64 5 1206 %dst6 = getelementptr inbounds i32, i32* %dst0, i64 6 1207 %dst7 = getelementptr inbounds i32, i32* %dst0, i64 7 1208 %dst8 = getelementptr inbounds i32, i32* %dst0, i64 8 1209 %dst9 = getelementptr inbounds i32, i32* %dst0, i64 9 1210 %dst10 = getelementptr inbounds i32, i32* %dst0, i64 10 1211 %dst11 = getelementptr inbounds i32, i32* %dst0, i64 11 1212 %dst12 = getelementptr inbounds i32, i32* %dst0, i64 12 1213 %dst13 = getelementptr inbounds i32, i32* %dst0, i64 13 1214 %dst14 = getelementptr inbounds i32, i32* %dst0, i64 14 1215 %dst15 = getelementptr inbounds i32, i32* %dst0, i64 15 1216 store i32 %m1, i32* %dst0 1217 store i32 %m2, i32* %dst1 1218 store i32 %m3, i32* %dst2 1219 store i32 %m4, i32* %dst3 1220 store i32 %m5, i32* %dst4 1221 store i32 %m6, i32* %dst5 1222 store i32 %m7, i32* %dst6 1223 store i32 %m8, i32* %dst7 1224 store i32 %m9, i32* %dst8 1225 store i32 %m10, i32* %dst9 1226 store i32 %m11, i32* %dst10 1227 store i32 %m12, i32* %dst11 1228 store i32 %m13, i32* %dst12 1229 store i32 %m14, i32* %dst13 1230 store i32 %m15, i32* %dst14 1231 store i32 %m16, i32* %dst15 1232 ret void 1233} 1234 1235define dso_local i32 @full(i8* nocapture noundef readonly %p1, i32 noundef %st1, i8* nocapture noundef readonly %p2, i32 noundef %st2) { 1236; CHECK-LABEL: @full( 1237; CHECK-NEXT: entry: 1238; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[ST1:%.*]] to i64 1239; CHECK-NEXT: [[IDX_EXT63:%.*]] = sext i32 [[ST2:%.*]] to i64 1240; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[P1:%.*]], i64 4 1241; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[P2:%.*]], i64 4 1242; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 [[IDX_EXT]] 1243; CHECK-NEXT: [[ADD_PTR64:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 [[IDX_EXT63]] 1244; CHECK-NEXT: [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 4 1245; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 4 1246; CHECK-NEXT: [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 [[IDX_EXT]] 1247; CHECK-NEXT: [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 [[IDX_EXT63]] 1248; CHECK-NEXT: [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_1]], i64 4 1249; CHECK-NEXT: [[ARRAYIDX5_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_1]], i64 4 1250; CHECK-NEXT: [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_1]], i64 [[IDX_EXT]] 1251; CHECK-NEXT: [[ADD_PTR64_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_1]], i64 [[IDX_EXT63]] 1252; CHECK-NEXT: [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_2]], i64 4 1253; CHECK-NEXT: [[ARRAYIDX5_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_2]], i64 4 1254; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[P1]] to <4 x i8>* 1255; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1 1256; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[P2]] to <4 x i8>* 1257; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 1 1258; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8* [[ARRAYIDX3]] to <4 x i8>* 1259; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1 1260; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[ARRAYIDX5]] to <4 x i8>* 1261; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i8>, <4 x i8>* [[TMP6]], align 1 1262; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8* [[ADD_PTR]] to <4 x i8>* 1263; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1 1264; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[ADD_PTR64]] to <4 x i8>* 1265; CHECK-NEXT: [[TMP11:%.*]] = load <4 x i8>, <4 x i8>* [[TMP10]], align 1 1266; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8* [[ARRAYIDX3_1]] to <4 x i8>* 1267; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i8>, <4 x i8>* [[TMP12]], align 1 1268; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8* [[ARRAYIDX5_1]] to <4 x i8>* 1269; CHECK-NEXT: [[TMP15:%.*]] = load <4 x i8>, <4 x i8>* [[TMP14]], align 1 1270; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8* [[ADD_PTR_1]] to <4 x i8>* 1271; CHECK-NEXT: [[TMP17:%.*]] = load <4 x i8>, <4 x i8>* [[TMP16]], align 1 1272; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8* [[ADD_PTR64_1]] to <4 x i8>* 1273; CHECK-NEXT: [[TMP19:%.*]] = load <4 x i8>, <4 x i8>* [[TMP18]], align 1 1274; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8* [[ARRAYIDX3_2]] to <4 x i8>* 1275; CHECK-NEXT: [[TMP21:%.*]] = load <4 x i8>, <4 x i8>* [[TMP20]], align 1 1276; CHECK-NEXT: [[TMP22:%.*]] = bitcast i8* [[ARRAYIDX5_2]] to <4 x i8>* 1277; CHECK-NEXT: [[TMP23:%.*]] = load <4 x i8>, <4 x i8>* [[TMP22]], align 1 1278; CHECK-NEXT: [[TMP24:%.*]] = bitcast i8* [[ADD_PTR_2]] to <4 x i8>* 1279; CHECK-NEXT: [[TMP25:%.*]] = load <4 x i8>, <4 x i8>* [[TMP24]], align 1 1280; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP25]], <4 x i8> [[TMP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1281; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1282; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <16 x i8> [[TMP26]], <16 x i8> [[TMP27]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef> 1283; CHECK-NEXT: [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1284; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP28]], <16 x i8> [[TMP29]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> 1285; CHECK-NEXT: [[TMP31:%.*]] = zext <16 x i8> [[TMP30]] to <16 x i32> 1286; CHECK-NEXT: [[TMP32:%.*]] = bitcast i8* [[ADD_PTR64_2]] to <4 x i8>* 1287; CHECK-NEXT: [[TMP33:%.*]] = load <4 x i8>, <4 x i8>* [[TMP32]], align 1 1288; CHECK-NEXT: [[TMP34:%.*]] = shufflevector <4 x i8> [[TMP33]], <4 x i8> [[TMP19]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1289; CHECK-NEXT: [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1290; CHECK-NEXT: [[TMP36:%.*]] = shufflevector <16 x i8> [[TMP34]], <16 x i8> [[TMP35]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef> 1291; CHECK-NEXT: [[TMP37:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1292; CHECK-NEXT: [[TMP38:%.*]] = shufflevector <16 x i8> [[TMP36]], <16 x i8> [[TMP37]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> 1293; CHECK-NEXT: [[TMP39:%.*]] = zext <16 x i8> [[TMP38]] to <16 x i32> 1294; CHECK-NEXT: [[TMP40:%.*]] = sub nsw <16 x i32> [[TMP31]], [[TMP39]] 1295; CHECK-NEXT: [[TMP41:%.*]] = bitcast i8* [[ARRAYIDX3_3]] to <4 x i8>* 1296; CHECK-NEXT: [[TMP42:%.*]] = load <4 x i8>, <4 x i8>* [[TMP41]], align 1 1297; CHECK-NEXT: [[TMP43:%.*]] = shufflevector <4 x i8> [[TMP42]], <4 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1298; CHECK-NEXT: [[TMP44:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1299; CHECK-NEXT: [[TMP45:%.*]] = shufflevector <16 x i8> [[TMP43]], <16 x i8> [[TMP44]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef> 1300; CHECK-NEXT: [[TMP46:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1301; CHECK-NEXT: [[TMP47:%.*]] = shufflevector <16 x i8> [[TMP45]], <16 x i8> [[TMP46]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> 1302; CHECK-NEXT: [[TMP48:%.*]] = zext <16 x i8> [[TMP47]] to <16 x i32> 1303; CHECK-NEXT: [[TMP49:%.*]] = bitcast i8* [[ARRAYIDX5_3]] to <4 x i8>* 1304; CHECK-NEXT: [[TMP50:%.*]] = load <4 x i8>, <4 x i8>* [[TMP49]], align 1 1305; CHECK-NEXT: [[TMP51:%.*]] = shufflevector <4 x i8> [[TMP50]], <4 x i8> [[TMP23]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1306; CHECK-NEXT: [[TMP52:%.*]] = shufflevector <4 x i8> [[TMP15]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1307; CHECK-NEXT: [[TMP53:%.*]] = shufflevector <16 x i8> [[TMP51]], <16 x i8> [[TMP52]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef> 1308; CHECK-NEXT: [[TMP54:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1309; CHECK-NEXT: [[TMP55:%.*]] = shufflevector <16 x i8> [[TMP53]], <16 x i8> [[TMP54]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19> 1310; CHECK-NEXT: [[TMP56:%.*]] = zext <16 x i8> [[TMP55]] to <16 x i32> 1311; CHECK-NEXT: [[TMP57:%.*]] = sub nsw <16 x i32> [[TMP48]], [[TMP56]] 1312; CHECK-NEXT: [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 1313; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i32> [[TMP58]], [[TMP40]] 1314; CHECK-NEXT: [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 1315; CHECK-NEXT: [[TMP61:%.*]] = add nsw <16 x i32> [[TMP59]], [[TMP60]] 1316; CHECK-NEXT: [[TMP62:%.*]] = sub nsw <16 x i32> [[TMP59]], [[TMP60]] 1317; CHECK-NEXT: [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 22, i32 18, i32 26, i32 30, i32 5, i32 1, i32 9, i32 13, i32 20, i32 16, i32 24, i32 28> 1318; CHECK-NEXT: [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP63]], <16 x i32> poison, <16 x i32> <i32 9, i32 8, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1319; CHECK-NEXT: [[TMP65:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP64]] 1320; CHECK-NEXT: [[TMP66:%.*]] = sub nsw <16 x i32> [[TMP63]], [[TMP64]] 1321; CHECK-NEXT: [[TMP67:%.*]] = shufflevector <16 x i32> [[TMP65]], <16 x i32> [[TMP66]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 1322; CHECK-NEXT: [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP67]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14> 1323; CHECK-NEXT: [[TMP69:%.*]] = add nsw <16 x i32> [[TMP67]], [[TMP68]] 1324; CHECK-NEXT: [[TMP70:%.*]] = sub nsw <16 x i32> [[TMP67]], [[TMP68]] 1325; CHECK-NEXT: [[TMP71:%.*]] = shufflevector <16 x i32> [[TMP69]], <16 x i32> [[TMP70]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 20, i32 5, i32 6, i32 23, i32 24, i32 9, i32 10, i32 27, i32 28, i32 13, i32 14, i32 31> 1326; CHECK-NEXT: [[TMP72:%.*]] = shufflevector <16 x i32> [[TMP71]], <16 x i32> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12> 1327; CHECK-NEXT: [[TMP73:%.*]] = add nsw <16 x i32> [[TMP71]], [[TMP72]] 1328; CHECK-NEXT: [[TMP74:%.*]] = sub nsw <16 x i32> [[TMP71]], [[TMP72]] 1329; CHECK-NEXT: [[TMP75:%.*]] = shufflevector <16 x i32> [[TMP73]], <16 x i32> [[TMP74]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31> 1330; CHECK-NEXT: [[TMP76:%.*]] = lshr <16 x i32> [[TMP75]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15> 1331; CHECK-NEXT: [[TMP77:%.*]] = and <16 x i32> [[TMP76]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537> 1332; CHECK-NEXT: [[TMP78:%.*]] = mul nuw <16 x i32> [[TMP77]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535> 1333; CHECK-NEXT: [[TMP79:%.*]] = add <16 x i32> [[TMP78]], [[TMP75]] 1334; CHECK-NEXT: [[TMP80:%.*]] = xor <16 x i32> [[TMP79]], [[TMP78]] 1335; CHECK-NEXT: [[TMP81:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP80]]) 1336; CHECK-NEXT: [[CONV118:%.*]] = and i32 [[TMP81]], 65535 1337; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP81]], 16 1338; CHECK-NEXT: [[ADD119:%.*]] = add nuw nsw i32 [[CONV118]], [[SHR]] 1339; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[ADD119]], 1 1340; CHECK-NEXT: ret i32 [[SHR120]] 1341; 1342entry: 1343 %idx.ext = sext i32 %st1 to i64 1344 %idx.ext63 = sext i32 %st2 to i64 1345 %0 = load i8, i8* %p1, align 1 1346 %conv = zext i8 %0 to i32 1347 %1 = load i8, i8* %p2, align 1 1348 %conv2 = zext i8 %1 to i32 1349 %sub = sub nsw i32 %conv, %conv2 1350 %arrayidx3 = getelementptr inbounds i8, i8* %p1, i64 4 1351 %2 = load i8, i8* %arrayidx3, align 1 1352 %conv4 = zext i8 %2 to i32 1353 %arrayidx5 = getelementptr inbounds i8, i8* %p2, i64 4 1354 %3 = load i8, i8* %arrayidx5, align 1 1355 %conv6 = zext i8 %3 to i32 1356 %sub7 = sub nsw i32 %conv4, %conv6 1357 %shl = shl nsw i32 %sub7, 16 1358 %add = add nsw i32 %shl, %sub 1359 %arrayidx8 = getelementptr inbounds i8, i8* %p1, i64 1 1360 %4 = load i8, i8* %arrayidx8, align 1 1361 %conv9 = zext i8 %4 to i32 1362 %arrayidx10 = getelementptr inbounds i8, i8* %p2, i64 1 1363 %5 = load i8, i8* %arrayidx10, align 1 1364 %conv11 = zext i8 %5 to i32 1365 %sub12 = sub nsw i32 %conv9, %conv11 1366 %arrayidx13 = getelementptr inbounds i8, i8* %p1, i64 5 1367 %6 = load i8, i8* %arrayidx13, align 1 1368 %conv14 = zext i8 %6 to i32 1369 %arrayidx15 = getelementptr inbounds i8, i8* %p2, i64 5 1370 %7 = load i8, i8* %arrayidx15, align 1 1371 %conv16 = zext i8 %7 to i32 1372 %sub17 = sub nsw i32 %conv14, %conv16 1373 %shl18 = shl nsw i32 %sub17, 16 1374 %add19 = add nsw i32 %shl18, %sub12 1375 %arrayidx20 = getelementptr inbounds i8, i8* %p1, i64 2 1376 %8 = load i8, i8* %arrayidx20, align 1 1377 %conv21 = zext i8 %8 to i32 1378 %arrayidx22 = getelementptr inbounds i8, i8* %p2, i64 2 1379 %9 = load i8, i8* %arrayidx22, align 1 1380 %conv23 = zext i8 %9 to i32 1381 %sub24 = sub nsw i32 %conv21, %conv23 1382 %arrayidx25 = getelementptr inbounds i8, i8* %p1, i64 6 1383 %10 = load i8, i8* %arrayidx25, align 1 1384 %conv26 = zext i8 %10 to i32 1385 %arrayidx27 = getelementptr inbounds i8, i8* %p2, i64 6 1386 %11 = load i8, i8* %arrayidx27, align 1 1387 %conv28 = zext i8 %11 to i32 1388 %sub29 = sub nsw i32 %conv26, %conv28 1389 %shl30 = shl nsw i32 %sub29, 16 1390 %add31 = add nsw i32 %shl30, %sub24 1391 %arrayidx32 = getelementptr inbounds i8, i8* %p1, i64 3 1392 %12 = load i8, i8* %arrayidx32, align 1 1393 %conv33 = zext i8 %12 to i32 1394 %arrayidx34 = getelementptr inbounds i8, i8* %p2, i64 3 1395 %13 = load i8, i8* %arrayidx34, align 1 1396 %conv35 = zext i8 %13 to i32 1397 %sub36 = sub nsw i32 %conv33, %conv35 1398 %arrayidx37 = getelementptr inbounds i8, i8* %p1, i64 7 1399 %14 = load i8, i8* %arrayidx37, align 1 1400 %conv38 = zext i8 %14 to i32 1401 %arrayidx39 = getelementptr inbounds i8, i8* %p2, i64 7 1402 %15 = load i8, i8* %arrayidx39, align 1 1403 %conv40 = zext i8 %15 to i32 1404 %sub41 = sub nsw i32 %conv38, %conv40 1405 %shl42 = shl nsw i32 %sub41, 16 1406 %add43 = add nsw i32 %shl42, %sub36 1407 %add44 = add nsw i32 %add19, %add 1408 %sub45 = sub nsw i32 %add, %add19 1409 %add46 = add nsw i32 %add43, %add31 1410 %sub47 = sub nsw i32 %add31, %add43 1411 %add48 = add nsw i32 %add46, %add44 1412 %sub51 = sub nsw i32 %add44, %add46 1413 %add55 = add nsw i32 %sub47, %sub45 1414 %sub59 = sub nsw i32 %sub45, %sub47 1415 %add.ptr = getelementptr inbounds i8, i8* %p1, i64 %idx.ext 1416 %add.ptr64 = getelementptr inbounds i8, i8* %p2, i64 %idx.ext63 1417 %16 = load i8, i8* %add.ptr, align 1 1418 %conv.1 = zext i8 %16 to i32 1419 %17 = load i8, i8* %add.ptr64, align 1 1420 %conv2.1 = zext i8 %17 to i32 1421 %sub.1 = sub nsw i32 %conv.1, %conv2.1 1422 %arrayidx3.1 = getelementptr inbounds i8, i8* %add.ptr, i64 4 1423 %18 = load i8, i8* %arrayidx3.1, align 1 1424 %conv4.1 = zext i8 %18 to i32 1425 %arrayidx5.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 4 1426 %19 = load i8, i8* %arrayidx5.1, align 1 1427 %conv6.1 = zext i8 %19 to i32 1428 %sub7.1 = sub nsw i32 %conv4.1, %conv6.1 1429 %shl.1 = shl nsw i32 %sub7.1, 16 1430 %add.1 = add nsw i32 %shl.1, %sub.1 1431 %arrayidx8.1 = getelementptr inbounds i8, i8* %add.ptr, i64 1 1432 %20 = load i8, i8* %arrayidx8.1, align 1 1433 %conv9.1 = zext i8 %20 to i32 1434 %arrayidx10.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 1 1435 %21 = load i8, i8* %arrayidx10.1, align 1 1436 %conv11.1 = zext i8 %21 to i32 1437 %sub12.1 = sub nsw i32 %conv9.1, %conv11.1 1438 %arrayidx13.1 = getelementptr inbounds i8, i8* %add.ptr, i64 5 1439 %22 = load i8, i8* %arrayidx13.1, align 1 1440 %conv14.1 = zext i8 %22 to i32 1441 %arrayidx15.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 5 1442 %23 = load i8, i8* %arrayidx15.1, align 1 1443 %conv16.1 = zext i8 %23 to i32 1444 %sub17.1 = sub nsw i32 %conv14.1, %conv16.1 1445 %shl18.1 = shl nsw i32 %sub17.1, 16 1446 %add19.1 = add nsw i32 %shl18.1, %sub12.1 1447 %arrayidx20.1 = getelementptr inbounds i8, i8* %add.ptr, i64 2 1448 %24 = load i8, i8* %arrayidx20.1, align 1 1449 %conv21.1 = zext i8 %24 to i32 1450 %arrayidx22.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 2 1451 %25 = load i8, i8* %arrayidx22.1, align 1 1452 %conv23.1 = zext i8 %25 to i32 1453 %sub24.1 = sub nsw i32 %conv21.1, %conv23.1 1454 %arrayidx25.1 = getelementptr inbounds i8, i8* %add.ptr, i64 6 1455 %26 = load i8, i8* %arrayidx25.1, align 1 1456 %conv26.1 = zext i8 %26 to i32 1457 %arrayidx27.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 6 1458 %27 = load i8, i8* %arrayidx27.1, align 1 1459 %conv28.1 = zext i8 %27 to i32 1460 %sub29.1 = sub nsw i32 %conv26.1, %conv28.1 1461 %shl30.1 = shl nsw i32 %sub29.1, 16 1462 %add31.1 = add nsw i32 %shl30.1, %sub24.1 1463 %arrayidx32.1 = getelementptr inbounds i8, i8* %add.ptr, i64 3 1464 %28 = load i8, i8* %arrayidx32.1, align 1 1465 %conv33.1 = zext i8 %28 to i32 1466 %arrayidx34.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 3 1467 %29 = load i8, i8* %arrayidx34.1, align 1 1468 %conv35.1 = zext i8 %29 to i32 1469 %sub36.1 = sub nsw i32 %conv33.1, %conv35.1 1470 %arrayidx37.1 = getelementptr inbounds i8, i8* %add.ptr, i64 7 1471 %30 = load i8, i8* %arrayidx37.1, align 1 1472 %conv38.1 = zext i8 %30 to i32 1473 %arrayidx39.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 7 1474 %31 = load i8, i8* %arrayidx39.1, align 1 1475 %conv40.1 = zext i8 %31 to i32 1476 %sub41.1 = sub nsw i32 %conv38.1, %conv40.1 1477 %shl42.1 = shl nsw i32 %sub41.1, 16 1478 %add43.1 = add nsw i32 %shl42.1, %sub36.1 1479 %add44.1 = add nsw i32 %add19.1, %add.1 1480 %sub45.1 = sub nsw i32 %add.1, %add19.1 1481 %add46.1 = add nsw i32 %add43.1, %add31.1 1482 %sub47.1 = sub nsw i32 %add31.1, %add43.1 1483 %add48.1 = add nsw i32 %add46.1, %add44.1 1484 %sub51.1 = sub nsw i32 %add44.1, %add46.1 1485 %add55.1 = add nsw i32 %sub47.1, %sub45.1 1486 %sub59.1 = sub nsw i32 %sub45.1, %sub47.1 1487 %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr, i64 %idx.ext 1488 %add.ptr64.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 %idx.ext63 1489 %32 = load i8, i8* %add.ptr.1, align 1 1490 %conv.2 = zext i8 %32 to i32 1491 %33 = load i8, i8* %add.ptr64.1, align 1 1492 %conv2.2 = zext i8 %33 to i32 1493 %sub.2 = sub nsw i32 %conv.2, %conv2.2 1494 %arrayidx3.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 4 1495 %34 = load i8, i8* %arrayidx3.2, align 1 1496 %conv4.2 = zext i8 %34 to i32 1497 %arrayidx5.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 4 1498 %35 = load i8, i8* %arrayidx5.2, align 1 1499 %conv6.2 = zext i8 %35 to i32 1500 %sub7.2 = sub nsw i32 %conv4.2, %conv6.2 1501 %shl.2 = shl nsw i32 %sub7.2, 16 1502 %add.2 = add nsw i32 %shl.2, %sub.2 1503 %arrayidx8.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 1 1504 %36 = load i8, i8* %arrayidx8.2, align 1 1505 %conv9.2 = zext i8 %36 to i32 1506 %arrayidx10.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 1 1507 %37 = load i8, i8* %arrayidx10.2, align 1 1508 %conv11.2 = zext i8 %37 to i32 1509 %sub12.2 = sub nsw i32 %conv9.2, %conv11.2 1510 %arrayidx13.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 5 1511 %38 = load i8, i8* %arrayidx13.2, align 1 1512 %conv14.2 = zext i8 %38 to i32 1513 %arrayidx15.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 5 1514 %39 = load i8, i8* %arrayidx15.2, align 1 1515 %conv16.2 = zext i8 %39 to i32 1516 %sub17.2 = sub nsw i32 %conv14.2, %conv16.2 1517 %shl18.2 = shl nsw i32 %sub17.2, 16 1518 %add19.2 = add nsw i32 %shl18.2, %sub12.2 1519 %arrayidx20.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 2 1520 %40 = load i8, i8* %arrayidx20.2, align 1 1521 %conv21.2 = zext i8 %40 to i32 1522 %arrayidx22.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 2 1523 %41 = load i8, i8* %arrayidx22.2, align 1 1524 %conv23.2 = zext i8 %41 to i32 1525 %sub24.2 = sub nsw i32 %conv21.2, %conv23.2 1526 %arrayidx25.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 6 1527 %42 = load i8, i8* %arrayidx25.2, align 1 1528 %conv26.2 = zext i8 %42 to i32 1529 %arrayidx27.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 6 1530 %43 = load i8, i8* %arrayidx27.2, align 1 1531 %conv28.2 = zext i8 %43 to i32 1532 %sub29.2 = sub nsw i32 %conv26.2, %conv28.2 1533 %shl30.2 = shl nsw i32 %sub29.2, 16 1534 %add31.2 = add nsw i32 %shl30.2, %sub24.2 1535 %arrayidx32.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 3 1536 %44 = load i8, i8* %arrayidx32.2, align 1 1537 %conv33.2 = zext i8 %44 to i32 1538 %arrayidx34.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 3 1539 %45 = load i8, i8* %arrayidx34.2, align 1 1540 %conv35.2 = zext i8 %45 to i32 1541 %sub36.2 = sub nsw i32 %conv33.2, %conv35.2 1542 %arrayidx37.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 7 1543 %46 = load i8, i8* %arrayidx37.2, align 1 1544 %conv38.2 = zext i8 %46 to i32 1545 %arrayidx39.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 7 1546 %47 = load i8, i8* %arrayidx39.2, align 1 1547 %conv40.2 = zext i8 %47 to i32 1548 %sub41.2 = sub nsw i32 %conv38.2, %conv40.2 1549 %shl42.2 = shl nsw i32 %sub41.2, 16 1550 %add43.2 = add nsw i32 %shl42.2, %sub36.2 1551 %add44.2 = add nsw i32 %add19.2, %add.2 1552 %sub45.2 = sub nsw i32 %add.2, %add19.2 1553 %add46.2 = add nsw i32 %add43.2, %add31.2 1554 %sub47.2 = sub nsw i32 %add31.2, %add43.2 1555 %add48.2 = add nsw i32 %add46.2, %add44.2 1556 %sub51.2 = sub nsw i32 %add44.2, %add46.2 1557 %add55.2 = add nsw i32 %sub47.2, %sub45.2 1558 %sub59.2 = sub nsw i32 %sub45.2, %sub47.2 1559 %add.ptr.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 %idx.ext 1560 %add.ptr64.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 %idx.ext63 1561 %48 = load i8, i8* %add.ptr.2, align 1 1562 %conv.3 = zext i8 %48 to i32 1563 %49 = load i8, i8* %add.ptr64.2, align 1 1564 %conv2.3 = zext i8 %49 to i32 1565 %sub.3 = sub nsw i32 %conv.3, %conv2.3 1566 %arrayidx3.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 4 1567 %50 = load i8, i8* %arrayidx3.3, align 1 1568 %conv4.3 = zext i8 %50 to i32 1569 %arrayidx5.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 4 1570 %51 = load i8, i8* %arrayidx5.3, align 1 1571 %conv6.3 = zext i8 %51 to i32 1572 %sub7.3 = sub nsw i32 %conv4.3, %conv6.3 1573 %shl.3 = shl nsw i32 %sub7.3, 16 1574 %add.3 = add nsw i32 %shl.3, %sub.3 1575 %arrayidx8.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 1 1576 %52 = load i8, i8* %arrayidx8.3, align 1 1577 %conv9.3 = zext i8 %52 to i32 1578 %arrayidx10.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 1 1579 %53 = load i8, i8* %arrayidx10.3, align 1 1580 %conv11.3 = zext i8 %53 to i32 1581 %sub12.3 = sub nsw i32 %conv9.3, %conv11.3 1582 %arrayidx13.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 5 1583 %54 = load i8, i8* %arrayidx13.3, align 1 1584 %conv14.3 = zext i8 %54 to i32 1585 %arrayidx15.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 5 1586 %55 = load i8, i8* %arrayidx15.3, align 1 1587 %conv16.3 = zext i8 %55 to i32 1588 %sub17.3 = sub nsw i32 %conv14.3, %conv16.3 1589 %shl18.3 = shl nsw i32 %sub17.3, 16 1590 %add19.3 = add nsw i32 %shl18.3, %sub12.3 1591 %arrayidx20.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 2 1592 %56 = load i8, i8* %arrayidx20.3, align 1 1593 %conv21.3 = zext i8 %56 to i32 1594 %arrayidx22.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 2 1595 %57 = load i8, i8* %arrayidx22.3, align 1 1596 %conv23.3 = zext i8 %57 to i32 1597 %sub24.3 = sub nsw i32 %conv21.3, %conv23.3 1598 %arrayidx25.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 6 1599 %58 = load i8, i8* %arrayidx25.3, align 1 1600 %conv26.3 = zext i8 %58 to i32 1601 %arrayidx27.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 6 1602 %59 = load i8, i8* %arrayidx27.3, align 1 1603 %conv28.3 = zext i8 %59 to i32 1604 %sub29.3 = sub nsw i32 %conv26.3, %conv28.3 1605 %shl30.3 = shl nsw i32 %sub29.3, 16 1606 %add31.3 = add nsw i32 %shl30.3, %sub24.3 1607 %arrayidx32.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 3 1608 %60 = load i8, i8* %arrayidx32.3, align 1 1609 %conv33.3 = zext i8 %60 to i32 1610 %arrayidx34.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 3 1611 %61 = load i8, i8* %arrayidx34.3, align 1 1612 %conv35.3 = zext i8 %61 to i32 1613 %sub36.3 = sub nsw i32 %conv33.3, %conv35.3 1614 %arrayidx37.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 7 1615 %62 = load i8, i8* %arrayidx37.3, align 1 1616 %conv38.3 = zext i8 %62 to i32 1617 %arrayidx39.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 7 1618 %63 = load i8, i8* %arrayidx39.3, align 1 1619 %conv40.3 = zext i8 %63 to i32 1620 %sub41.3 = sub nsw i32 %conv38.3, %conv40.3 1621 %shl42.3 = shl nsw i32 %sub41.3, 16 1622 %add43.3 = add nsw i32 %shl42.3, %sub36.3 1623 %add44.3 = add nsw i32 %add19.3, %add.3 1624 %sub45.3 = sub nsw i32 %add.3, %add19.3 1625 %add46.3 = add nsw i32 %add43.3, %add31.3 1626 %sub47.3 = sub nsw i32 %add31.3, %add43.3 1627 %add48.3 = add nsw i32 %add46.3, %add44.3 1628 %sub51.3 = sub nsw i32 %add44.3, %add46.3 1629 %add55.3 = add nsw i32 %sub47.3, %sub45.3 1630 %sub59.3 = sub nsw i32 %sub45.3, %sub47.3 1631 %add78 = add nsw i32 %add48.1, %add48 1632 %sub86 = sub nsw i32 %add48, %add48.1 1633 %add94 = add nsw i32 %add48.3, %add48.2 1634 %sub102 = sub nsw i32 %add48.2, %add48.3 1635 %add103 = add nsw i32 %add94, %add78 1636 %sub104 = sub nsw i32 %add78, %add94 1637 %add105 = add nsw i32 %sub102, %sub86 1638 %sub106 = sub nsw i32 %sub86, %sub102 1639 %shr.i = lshr i32 %add103, 15 1640 %and.i = and i32 %shr.i, 65537 1641 %mul.i = mul nuw i32 %and.i, 65535 1642 %add.i = add i32 %mul.i, %add103 1643 %xor.i = xor i32 %add.i, %mul.i 1644 %shr.i184 = lshr i32 %add105, 15 1645 %and.i185 = and i32 %shr.i184, 65537 1646 %mul.i186 = mul nuw i32 %and.i185, 65535 1647 %add.i187 = add i32 %mul.i186, %add105 1648 %xor.i188 = xor i32 %add.i187, %mul.i186 1649 %shr.i189 = lshr i32 %sub104, 15 1650 %and.i190 = and i32 %shr.i189, 65537 1651 %mul.i191 = mul nuw i32 %and.i190, 65535 1652 %add.i192 = add i32 %mul.i191, %sub104 1653 %xor.i193 = xor i32 %add.i192, %mul.i191 1654 %shr.i194 = lshr i32 %sub106, 15 1655 %and.i195 = and i32 %shr.i194, 65537 1656 %mul.i196 = mul nuw i32 %and.i195, 65535 1657 %add.i197 = add i32 %mul.i196, %sub106 1658 %xor.i198 = xor i32 %add.i197, %mul.i196 1659 %add110 = add i32 %xor.i188, %xor.i 1660 %add112 = add i32 %add110, %xor.i193 1661 %add113 = add i32 %add112, %xor.i198 1662 %add78.1 = add nsw i32 %add55.1, %add55 1663 %sub86.1 = sub nsw i32 %add55, %add55.1 1664 %add94.1 = add nsw i32 %add55.3, %add55.2 1665 %sub102.1 = sub nsw i32 %add55.2, %add55.3 1666 %add103.1 = add nsw i32 %add94.1, %add78.1 1667 %sub104.1 = sub nsw i32 %add78.1, %add94.1 1668 %add105.1 = add nsw i32 %sub102.1, %sub86.1 1669 %sub106.1 = sub nsw i32 %sub86.1, %sub102.1 1670 %shr.i.1 = lshr i32 %add103.1, 15 1671 %and.i.1 = and i32 %shr.i.1, 65537 1672 %mul.i.1 = mul nuw i32 %and.i.1, 65535 1673 %add.i.1 = add i32 %mul.i.1, %add103.1 1674 %xor.i.1 = xor i32 %add.i.1, %mul.i.1 1675 %shr.i184.1 = lshr i32 %add105.1, 15 1676 %and.i185.1 = and i32 %shr.i184.1, 65537 1677 %mul.i186.1 = mul nuw i32 %and.i185.1, 65535 1678 %add.i187.1 = add i32 %mul.i186.1, %add105.1 1679 %xor.i188.1 = xor i32 %add.i187.1, %mul.i186.1 1680 %shr.i189.1 = lshr i32 %sub104.1, 15 1681 %and.i190.1 = and i32 %shr.i189.1, 65537 1682 %mul.i191.1 = mul nuw i32 %and.i190.1, 65535 1683 %add.i192.1 = add i32 %mul.i191.1, %sub104.1 1684 %xor.i193.1 = xor i32 %add.i192.1, %mul.i191.1 1685 %shr.i194.1 = lshr i32 %sub106.1, 15 1686 %and.i195.1 = and i32 %shr.i194.1, 65537 1687 %mul.i196.1 = mul nuw i32 %and.i195.1, 65535 1688 %add.i197.1 = add i32 %mul.i196.1, %sub106.1 1689 %xor.i198.1 = xor i32 %add.i197.1, %mul.i196.1 1690 %add108.1 = add i32 %xor.i188.1, %add113 1691 %add110.1 = add i32 %add108.1, %xor.i.1 1692 %add112.1 = add i32 %add110.1, %xor.i193.1 1693 %add113.1 = add i32 %add112.1, %xor.i198.1 1694 %add78.2 = add nsw i32 %sub51.1, %sub51 1695 %sub86.2 = sub nsw i32 %sub51, %sub51.1 1696 %add94.2 = add nsw i32 %sub51.3, %sub51.2 1697 %sub102.2 = sub nsw i32 %sub51.2, %sub51.3 1698 %add103.2 = add nsw i32 %add94.2, %add78.2 1699 %sub104.2 = sub nsw i32 %add78.2, %add94.2 1700 %add105.2 = add nsw i32 %sub102.2, %sub86.2 1701 %sub106.2 = sub nsw i32 %sub86.2, %sub102.2 1702 %shr.i.2 = lshr i32 %add103.2, 15 1703 %and.i.2 = and i32 %shr.i.2, 65537 1704 %mul.i.2 = mul nuw i32 %and.i.2, 65535 1705 %add.i.2 = add i32 %mul.i.2, %add103.2 1706 %xor.i.2 = xor i32 %add.i.2, %mul.i.2 1707 %shr.i184.2 = lshr i32 %add105.2, 15 1708 %and.i185.2 = and i32 %shr.i184.2, 65537 1709 %mul.i186.2 = mul nuw i32 %and.i185.2, 65535 1710 %add.i187.2 = add i32 %mul.i186.2, %add105.2 1711 %xor.i188.2 = xor i32 %add.i187.2, %mul.i186.2 1712 %shr.i189.2 = lshr i32 %sub104.2, 15 1713 %and.i190.2 = and i32 %shr.i189.2, 65537 1714 %mul.i191.2 = mul nuw i32 %and.i190.2, 65535 1715 %add.i192.2 = add i32 %mul.i191.2, %sub104.2 1716 %xor.i193.2 = xor i32 %add.i192.2, %mul.i191.2 1717 %shr.i194.2 = lshr i32 %sub106.2, 15 1718 %and.i195.2 = and i32 %shr.i194.2, 65537 1719 %mul.i196.2 = mul nuw i32 %and.i195.2, 65535 1720 %add.i197.2 = add i32 %mul.i196.2, %sub106.2 1721 %xor.i198.2 = xor i32 %add.i197.2, %mul.i196.2 1722 %add108.2 = add i32 %xor.i188.2, %add113.1 1723 %add110.2 = add i32 %add108.2, %xor.i.2 1724 %add112.2 = add i32 %add110.2, %xor.i193.2 1725 %add113.2 = add i32 %add112.2, %xor.i198.2 1726 %add78.3 = add nsw i32 %sub59.1, %sub59 1727 %sub86.3 = sub nsw i32 %sub59, %sub59.1 1728 %add94.3 = add nsw i32 %sub59.3, %sub59.2 1729 %sub102.3 = sub nsw i32 %sub59.2, %sub59.3 1730 %add103.3 = add nsw i32 %add94.3, %add78.3 1731 %sub104.3 = sub nsw i32 %add78.3, %add94.3 1732 %add105.3 = add nsw i32 %sub102.3, %sub86.3 1733 %sub106.3 = sub nsw i32 %sub86.3, %sub102.3 1734 %shr.i.3 = lshr i32 %add103.3, 15 1735 %and.i.3 = and i32 %shr.i.3, 65537 1736 %mul.i.3 = mul nuw i32 %and.i.3, 65535 1737 %add.i.3 = add i32 %mul.i.3, %add103.3 1738 %xor.i.3 = xor i32 %add.i.3, %mul.i.3 1739 %shr.i184.3 = lshr i32 %add105.3, 15 1740 %and.i185.3 = and i32 %shr.i184.3, 65537 1741 %mul.i186.3 = mul nuw i32 %and.i185.3, 65535 1742 %add.i187.3 = add i32 %mul.i186.3, %add105.3 1743 %xor.i188.3 = xor i32 %add.i187.3, %mul.i186.3 1744 %shr.i189.3 = lshr i32 %sub104.3, 15 1745 %and.i190.3 = and i32 %shr.i189.3, 65537 1746 %mul.i191.3 = mul nuw i32 %and.i190.3, 65535 1747 %add.i192.3 = add i32 %mul.i191.3, %sub104.3 1748 %xor.i193.3 = xor i32 %add.i192.3, %mul.i191.3 1749 %shr.i194.3 = lshr i32 %sub106.3, 15 1750 %and.i195.3 = and i32 %shr.i194.3, 65537 1751 %mul.i196.3 = mul nuw i32 %and.i195.3, 65535 1752 %add.i197.3 = add i32 %mul.i196.3, %sub106.3 1753 %xor.i198.3 = xor i32 %add.i197.3, %mul.i196.3 1754 %add108.3 = add i32 %xor.i188.3, %add113.2 1755 %add110.3 = add i32 %add108.3, %xor.i.3 1756 %add112.3 = add i32 %add110.3, %xor.i193.3 1757 %add113.3 = add i32 %add112.3, %xor.i198.3 1758 %conv118 = and i32 %add113.3, 65535 1759 %shr = lshr i32 %add113.3, 16 1760 %add119 = add nuw nsw i32 %conv118, %shr 1761 %shr120 = lshr i32 %add119, 1 1762 ret i32 %shr120 1763} 1764