1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -slp-vectorizer -instcombine -mtriple=aarch64--linux-gnu < %s | FileCheck %s
3
4target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
5target triple = "aarch64"
6
7define i16 @reduce_allstrided(i16* nocapture noundef readonly %x, i16* nocapture noundef readonly %y, i32 noundef %stride) {
8; CHECK-LABEL: @reduce_allstrided(
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[X:%.*]], align 2
11; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64
12; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM]]
13; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2
14; CHECK-NEXT:    [[MUL2:%.*]] = shl nsw i32 [[STRIDE]], 1
15; CHECK-NEXT:    [[IDXPROM3:%.*]] = sext i32 [[MUL2]] to i64
16; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM3]]
17; CHECK-NEXT:    [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX4]], align 2
18; CHECK-NEXT:    [[MUL5:%.*]] = mul nsw i32 [[STRIDE]], 3
19; CHECK-NEXT:    [[IDXPROM6:%.*]] = sext i32 [[MUL5]] to i64
20; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM6]]
21; CHECK-NEXT:    [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2
22; CHECK-NEXT:    [[MUL8:%.*]] = shl nsw i32 [[STRIDE]], 2
23; CHECK-NEXT:    [[IDXPROM9:%.*]] = sext i32 [[MUL8]] to i64
24; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM9]]
25; CHECK-NEXT:    [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
26; CHECK-NEXT:    [[MUL11:%.*]] = mul nsw i32 [[STRIDE]], 5
27; CHECK-NEXT:    [[IDXPROM12:%.*]] = sext i32 [[MUL11]] to i64
28; CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM12]]
29; CHECK-NEXT:    [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX13]], align 2
30; CHECK-NEXT:    [[MUL14:%.*]] = mul nsw i32 [[STRIDE]], 6
31; CHECK-NEXT:    [[IDXPROM15:%.*]] = sext i32 [[MUL14]] to i64
32; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM15]]
33; CHECK-NEXT:    [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX16]], align 2
34; CHECK-NEXT:    [[MUL17:%.*]] = mul nsw i32 [[STRIDE]], 7
35; CHECK-NEXT:    [[IDXPROM18:%.*]] = sext i32 [[MUL17]] to i64
36; CHECK-NEXT:    [[ARRAYIDX19:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM18]]
37; CHECK-NEXT:    [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX19]], align 2
38; CHECK-NEXT:    [[TMP8:%.*]] = load i16, i16* [[Y:%.*]], align 2
39; CHECK-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM]]
40; CHECK-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX23]], align 2
41; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM3]]
42; CHECK-NEXT:    [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX26]], align 2
43; CHECK-NEXT:    [[ARRAYIDX29:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM6]]
44; CHECK-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX29]], align 2
45; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM9]]
46; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX32]], align 2
47; CHECK-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM12]]
48; CHECK-NEXT:    [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX35]], align 2
49; CHECK-NEXT:    [[ARRAYIDX38:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM15]]
50; CHECK-NEXT:    [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX38]], align 2
51; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM18]]
52; CHECK-NEXT:    [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX41]], align 2
53; CHECK-NEXT:    [[MUL43:%.*]] = mul i16 [[TMP8]], [[TMP0]]
54; CHECK-NEXT:    [[MUL48:%.*]] = mul i16 [[TMP9]], [[TMP1]]
55; CHECK-NEXT:    [[ADD49:%.*]] = add i16 [[MUL48]], [[MUL43]]
56; CHECK-NEXT:    [[MUL54:%.*]] = mul i16 [[TMP10]], [[TMP2]]
57; CHECK-NEXT:    [[ADD55:%.*]] = add i16 [[ADD49]], [[MUL54]]
58; CHECK-NEXT:    [[MUL60:%.*]] = mul i16 [[TMP11]], [[TMP3]]
59; CHECK-NEXT:    [[ADD61:%.*]] = add i16 [[ADD55]], [[MUL60]]
60; CHECK-NEXT:    [[MUL66:%.*]] = mul i16 [[TMP12]], [[TMP4]]
61; CHECK-NEXT:    [[ADD67:%.*]] = add i16 [[ADD61]], [[MUL66]]
62; CHECK-NEXT:    [[MUL72:%.*]] = mul i16 [[TMP13]], [[TMP5]]
63; CHECK-NEXT:    [[ADD73:%.*]] = add i16 [[ADD67]], [[MUL72]]
64; CHECK-NEXT:    [[MUL78:%.*]] = mul i16 [[TMP14]], [[TMP6]]
65; CHECK-NEXT:    [[ADD79:%.*]] = add i16 [[ADD73]], [[MUL78]]
66; CHECK-NEXT:    [[MUL84:%.*]] = mul i16 [[TMP15]], [[TMP7]]
67; CHECK-NEXT:    [[ADD85:%.*]] = add i16 [[ADD79]], [[MUL84]]
68; CHECK-NEXT:    ret i16 [[ADD85]]
69;
70entry:
71  %0 = load i16, i16* %x, align 2
72  %idxprom = sext i32 %stride to i64
73  %arrayidx1 = getelementptr inbounds i16, i16* %x, i64 %idxprom
74  %1 = load i16, i16* %arrayidx1, align 2
75  %mul2 = shl nsw i32 %stride, 1
76  %idxprom3 = sext i32 %mul2 to i64
77  %arrayidx4 = getelementptr inbounds i16, i16* %x, i64 %idxprom3
78  %2 = load i16, i16* %arrayidx4, align 2
79  %mul5 = mul nsw i32 %stride, 3
80  %idxprom6 = sext i32 %mul5 to i64
81  %arrayidx7 = getelementptr inbounds i16, i16* %x, i64 %idxprom6
82  %3 = load i16, i16* %arrayidx7, align 2
83  %mul8 = shl nsw i32 %stride, 2
84  %idxprom9 = sext i32 %mul8 to i64
85  %arrayidx10 = getelementptr inbounds i16, i16* %x, i64 %idxprom9
86  %4 = load i16, i16* %arrayidx10, align 2
87  %mul11 = mul nsw i32 %stride, 5
88  %idxprom12 = sext i32 %mul11 to i64
89  %arrayidx13 = getelementptr inbounds i16, i16* %x, i64 %idxprom12
90  %5 = load i16, i16* %arrayidx13, align 2
91  %mul14 = mul nsw i32 %stride, 6
92  %idxprom15 = sext i32 %mul14 to i64
93  %arrayidx16 = getelementptr inbounds i16, i16* %x, i64 %idxprom15
94  %6 = load i16, i16* %arrayidx16, align 2
95  %mul17 = mul nsw i32 %stride, 7
96  %idxprom18 = sext i32 %mul17 to i64
97  %arrayidx19 = getelementptr inbounds i16, i16* %x, i64 %idxprom18
98  %7 = load i16, i16* %arrayidx19, align 2
99  %8 = load i16, i16* %y, align 2
100  %arrayidx23 = getelementptr inbounds i16, i16* %y, i64 %idxprom
101  %9 = load i16, i16* %arrayidx23, align 2
102  %arrayidx26 = getelementptr inbounds i16, i16* %y, i64 %idxprom3
103  %10 = load i16, i16* %arrayidx26, align 2
104  %arrayidx29 = getelementptr inbounds i16, i16* %y, i64 %idxprom6
105  %11 = load i16, i16* %arrayidx29, align 2
106  %arrayidx32 = getelementptr inbounds i16, i16* %y, i64 %idxprom9
107  %12 = load i16, i16* %arrayidx32, align 2
108  %arrayidx35 = getelementptr inbounds i16, i16* %y, i64 %idxprom12
109  %13 = load i16, i16* %arrayidx35, align 2
110  %arrayidx38 = getelementptr inbounds i16, i16* %y, i64 %idxprom15
111  %14 = load i16, i16* %arrayidx38, align 2
112  %arrayidx41 = getelementptr inbounds i16, i16* %y, i64 %idxprom18
113  %15 = load i16, i16* %arrayidx41, align 2
114  %mul43 = mul i16 %8, %0
115  %mul48 = mul i16 %9, %1
116  %add49 = add i16 %mul48, %mul43
117  %mul54 = mul i16 %10, %2
118  %add55 = add i16 %add49, %mul54
119  %mul60 = mul i16 %11, %3
120  %add61 = add i16 %add55, %mul60
121  %mul66 = mul i16 %12, %4
122  %add67 = add i16 %add61, %mul66
123  %mul72 = mul i16 %13, %5
124  %add73 = add i16 %add67, %mul72
125  %mul78 = mul i16 %14, %6
126  %add79 = add i16 %add73, %mul78
127  %mul84 = mul i16 %15, %7
128  %add85 = add i16 %add79, %mul84
129  ret i16 %add85
130}
131
132define i16 @reduce_blockstrided2(i16* nocapture noundef readonly %x, i16* nocapture noundef readonly %y, i32 noundef %stride) {
133; CHECK-LABEL: @reduce_blockstrided2(
134; CHECK-NEXT:  entry:
135; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[X:%.*]], align 2
136; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 1
137; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2
138; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64
139; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM]]
140; CHECK-NEXT:    [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
141; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[STRIDE]], 1
142; CHECK-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[ADD3]] to i64
143; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM4]]
144; CHECK-NEXT:    [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX5]], align 2
145; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[STRIDE]], 1
146; CHECK-NEXT:    [[IDXPROM7:%.*]] = sext i32 [[MUL]] to i64
147; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM7]]
148; CHECK-NEXT:    [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX8]], align 2
149; CHECK-NEXT:    [[ADD10:%.*]] = or i32 [[MUL]], 1
150; CHECK-NEXT:    [[IDXPROM11:%.*]] = sext i32 [[ADD10]] to i64
151; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM11]]
152; CHECK-NEXT:    [[TMP5:%.*]] = load i16, i16* [[ARRAYIDX12]], align 2
153; CHECK-NEXT:    [[MUL13:%.*]] = mul nsw i32 [[STRIDE]], 3
154; CHECK-NEXT:    [[IDXPROM15:%.*]] = sext i32 [[MUL13]] to i64
155; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM15]]
156; CHECK-NEXT:    [[TMP6:%.*]] = load i16, i16* [[ARRAYIDX16]], align 2
157; CHECK-NEXT:    [[ADD18:%.*]] = add nsw i32 [[MUL13]], 1
158; CHECK-NEXT:    [[IDXPROM19:%.*]] = sext i32 [[ADD18]] to i64
159; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM19]]
160; CHECK-NEXT:    [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX20]], align 2
161; CHECK-NEXT:    [[TMP8:%.*]] = load i16, i16* [[Y:%.*]], align 2
162; CHECK-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM]]
163; CHECK-NEXT:    [[TMP9:%.*]] = load i16, i16* [[ARRAYIDX24]], align 2
164; CHECK-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM7]]
165; CHECK-NEXT:    [[TMP10:%.*]] = load i16, i16* [[ARRAYIDX28]], align 2
166; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM15]]
167; CHECK-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX32]], align 2
168; CHECK-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 1
169; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX33]], align 2
170; CHECK-NEXT:    [[ARRAYIDX36:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM4]]
171; CHECK-NEXT:    [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX36]], align 2
172; CHECK-NEXT:    [[ARRAYIDX40:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM11]]
173; CHECK-NEXT:    [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX40]], align 2
174; CHECK-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM19]]
175; CHECK-NEXT:    [[TMP15:%.*]] = load i16, i16* [[ARRAYIDX44]], align 2
176; CHECK-NEXT:    [[MUL46:%.*]] = mul i16 [[TMP8]], [[TMP0]]
177; CHECK-NEXT:    [[MUL52:%.*]] = mul i16 [[TMP12]], [[TMP1]]
178; CHECK-NEXT:    [[MUL58:%.*]] = mul i16 [[TMP9]], [[TMP2]]
179; CHECK-NEXT:    [[MUL64:%.*]] = mul i16 [[TMP13]], [[TMP3]]
180; CHECK-NEXT:    [[MUL70:%.*]] = mul i16 [[TMP10]], [[TMP4]]
181; CHECK-NEXT:    [[MUL76:%.*]] = mul i16 [[TMP14]], [[TMP5]]
182; CHECK-NEXT:    [[MUL82:%.*]] = mul i16 [[TMP11]], [[TMP6]]
183; CHECK-NEXT:    [[MUL88:%.*]] = mul i16 [[TMP15]], [[TMP7]]
184; CHECK-NEXT:    [[ADD53:%.*]] = add i16 [[MUL58]], [[MUL46]]
185; CHECK-NEXT:    [[ADD59:%.*]] = add i16 [[ADD53]], [[MUL70]]
186; CHECK-NEXT:    [[ADD65:%.*]] = add i16 [[ADD59]], [[MUL82]]
187; CHECK-NEXT:    [[ADD71:%.*]] = add i16 [[ADD65]], [[MUL52]]
188; CHECK-NEXT:    [[ADD77:%.*]] = add i16 [[ADD71]], [[MUL64]]
189; CHECK-NEXT:    [[ADD83:%.*]] = add i16 [[ADD77]], [[MUL76]]
190; CHECK-NEXT:    [[ADD89:%.*]] = add i16 [[ADD83]], [[MUL88]]
191; CHECK-NEXT:    ret i16 [[ADD89]]
192;
193entry:
194  %0 = load i16, i16* %x, align 2
195  %arrayidx1 = getelementptr inbounds i16, i16* %x, i64 1
196  %1 = load i16, i16* %arrayidx1, align 2
197  %idxprom = sext i32 %stride to i64
198  %arrayidx2 = getelementptr inbounds i16, i16* %x, i64 %idxprom
199  %2 = load i16, i16* %arrayidx2, align 2
200  %add3 = add nsw i32 %stride, 1
201  %idxprom4 = sext i32 %add3 to i64
202  %arrayidx5 = getelementptr inbounds i16, i16* %x, i64 %idxprom4
203  %3 = load i16, i16* %arrayidx5, align 2
204  %mul = shl nsw i32 %stride, 1
205  %idxprom7 = sext i32 %mul to i64
206  %arrayidx8 = getelementptr inbounds i16, i16* %x, i64 %idxprom7
207  %4 = load i16, i16* %arrayidx8, align 2
208  %add10 = or i32 %mul, 1
209  %idxprom11 = sext i32 %add10 to i64
210  %arrayidx12 = getelementptr inbounds i16, i16* %x, i64 %idxprom11
211  %5 = load i16, i16* %arrayidx12, align 2
212  %mul13 = mul nsw i32 %stride, 3
213  %idxprom15 = sext i32 %mul13 to i64
214  %arrayidx16 = getelementptr inbounds i16, i16* %x, i64 %idxprom15
215  %6 = load i16, i16* %arrayidx16, align 2
216  %add18 = add nsw i32 %mul13, 1
217  %idxprom19 = sext i32 %add18 to i64
218  %arrayidx20 = getelementptr inbounds i16, i16* %x, i64 %idxprom19
219  %7 = load i16, i16* %arrayidx20, align 2
220  %8 = load i16, i16* %y, align 2
221  %arrayidx24 = getelementptr inbounds i16, i16* %y, i64 %idxprom
222  %9 = load i16, i16* %arrayidx24, align 2
223  %arrayidx28 = getelementptr inbounds i16, i16* %y, i64 %idxprom7
224  %10 = load i16, i16* %arrayidx28, align 2
225  %arrayidx32 = getelementptr inbounds i16, i16* %y, i64 %idxprom15
226  %11 = load i16, i16* %arrayidx32, align 2
227  %arrayidx33 = getelementptr inbounds i16, i16* %y, i64 1
228  %12 = load i16, i16* %arrayidx33, align 2
229  %arrayidx36 = getelementptr inbounds i16, i16* %y, i64 %idxprom4
230  %13 = load i16, i16* %arrayidx36, align 2
231  %arrayidx40 = getelementptr inbounds i16, i16* %y, i64 %idxprom11
232  %14 = load i16, i16* %arrayidx40, align 2
233  %arrayidx44 = getelementptr inbounds i16, i16* %y, i64 %idxprom19
234  %15 = load i16, i16* %arrayidx44, align 2
235  %mul46 = mul i16 %8, %0
236  %mul52 = mul i16 %12, %1
237  %mul58 = mul i16 %9, %2
238  %mul64 = mul i16 %13, %3
239  %mul70 = mul i16 %10, %4
240  %mul76 = mul i16 %14, %5
241  %mul82 = mul i16 %11, %6
242  %mul88 = mul i16 %15, %7
243  %add53 = add i16 %mul58, %mul46
244  %add59 = add i16 %add53, %mul70
245  %add65 = add i16 %add59, %mul82
246  %add71 = add i16 %add65, %mul52
247  %add77 = add i16 %add71, %mul64
248  %add83 = add i16 %add77, %mul76
249  %add89 = add i16 %add83, %mul88
250  ret i16 %add89
251}
252
253define i16 @reduce_blockstrided3(i16* nocapture noundef readonly %x, i16* nocapture noundef readonly %y, i32 noundef %stride) {
254; CHECK-LABEL: @reduce_blockstrided3(
255; CHECK-NEXT:  entry:
256; CHECK-NEXT:    [[L0:%.*]] = load i16, i16* [[X:%.*]], align 2
257; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 1
258; CHECK-NEXT:    [[L1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2
259; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 2
260; CHECK-NEXT:    [[L2:%.*]] = load i16, i16* [[ARRAYIDX2]], align 2
261; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64
262; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM]]
263; CHECK-NEXT:    [[L4:%.*]] = load i16, i16* [[ARRAYIDX4]], align 2
264; CHECK-NEXT:    [[ADD5:%.*]] = add nsw i32 [[STRIDE]], 1
265; CHECK-NEXT:    [[IDXPROM6:%.*]] = sext i32 [[ADD5]] to i64
266; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM6]]
267; CHECK-NEXT:    [[L5:%.*]] = load i16, i16* [[ARRAYIDX7]], align 2
268; CHECK-NEXT:    [[ADD8:%.*]] = add nsw i32 [[STRIDE]], 2
269; CHECK-NEXT:    [[IDXPROM9:%.*]] = sext i32 [[ADD8]] to i64
270; CHECK-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[X]], i64 [[IDXPROM9]]
271; CHECK-NEXT:    [[L6:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
272; CHECK-NEXT:    [[L8:%.*]] = load i16, i16* [[Y:%.*]], align 2
273; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 1
274; CHECK-NEXT:    [[L9:%.*]] = load i16, i16* [[ARRAYIDX15]], align 2
275; CHECK-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 2
276; CHECK-NEXT:    [[L10:%.*]] = load i16, i16* [[ARRAYIDX16]], align 2
277; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM]]
278; CHECK-NEXT:    [[L12:%.*]] = load i16, i16* [[ARRAYIDX20]], align 2
279; CHECK-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM6]]
280; CHECK-NEXT:    [[L13:%.*]] = load i16, i16* [[ARRAYIDX23]], align 2
281; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds i16, i16* [[Y]], i64 [[IDXPROM9]]
282; CHECK-NEXT:    [[L14:%.*]] = load i16, i16* [[ARRAYIDX26]], align 2
283; CHECK-NEXT:    [[MUL:%.*]] = mul i16 [[L8]], [[L0]]
284; CHECK-NEXT:    [[MUL36:%.*]] = mul i16 [[L9]], [[L1]]
285; CHECK-NEXT:    [[ADD37:%.*]] = add i16 [[MUL36]], [[MUL]]
286; CHECK-NEXT:    [[MUL48:%.*]] = mul i16 [[L10]], [[L2]]
287; CHECK-NEXT:    [[ADD49:%.*]] = add i16 [[ADD37]], [[MUL48]]
288; CHECK-NEXT:    [[MUL54:%.*]] = mul i16 [[L13]], [[L5]]
289; CHECK-NEXT:    [[ADD55:%.*]] = add i16 [[ADD49]], [[MUL54]]
290; CHECK-NEXT:    [[MUL60:%.*]] = mul i16 [[L12]], [[L4]]
291; CHECK-NEXT:    [[ADD61:%.*]] = add i16 [[ADD55]], [[MUL60]]
292; CHECK-NEXT:    [[MUL72:%.*]] = mul i16 [[L14]], [[L6]]
293; CHECK-NEXT:    [[ADD73:%.*]] = add i16 [[ADD61]], [[MUL72]]
294; CHECK-NEXT:    ret i16 [[ADD73]]
295;
296entry:
297  %l0 = load i16, i16* %x, align 2
298  %arrayidx1 = getelementptr inbounds i16, i16* %x, i64 1
299  %l1 = load i16, i16* %arrayidx1, align 2
300  %arrayidx2 = getelementptr inbounds i16, i16* %x, i64 2
301  %l2 = load i16, i16* %arrayidx2, align 2
302  %idxprom = sext i32 %stride to i64
303  %arrayidx4 = getelementptr inbounds i16, i16* %x, i64 %idxprom
304  %l4 = load i16, i16* %arrayidx4, align 2
305  %add5 = add nsw i32 %stride, 1
306  %idxprom6 = sext i32 %add5 to i64
307  %arrayidx7 = getelementptr inbounds i16, i16* %x, i64 %idxprom6
308  %l5 = load i16, i16* %arrayidx7, align 2
309  %add8 = add nsw i32 %stride, 2
310  %idxprom9 = sext i32 %add8 to i64
311  %arrayidx10 = getelementptr inbounds i16, i16* %x, i64 %idxprom9
312  %l6 = load i16, i16* %arrayidx10, align 2
313  %add11 = add nsw i32 %stride, 3
314  %idxprom12 = sext i32 %add11 to i64
315  %l8 = load i16, i16* %y, align 2
316  %arrayidx15 = getelementptr inbounds i16, i16* %y, i64 1
317  %l9 = load i16, i16* %arrayidx15, align 2
318  %arrayidx16 = getelementptr inbounds i16, i16* %y, i64 2
319  %l10 = load i16, i16* %arrayidx16, align 2
320  %arrayidx20 = getelementptr inbounds i16, i16* %y, i64 %idxprom
321  %l12 = load i16, i16* %arrayidx20, align 2
322  %arrayidx23 = getelementptr inbounds i16, i16* %y, i64 %idxprom6
323  %l13 = load i16, i16* %arrayidx23, align 2
324  %arrayidx26 = getelementptr inbounds i16, i16* %y, i64 %idxprom9
325  %l14 = load i16, i16* %arrayidx26, align 2
326  %mul = mul i16 %l8, %l0
327  %mul36 = mul i16 %l9, %l1
328  %add37 = add i16 %mul36, %mul
329  %mul48 = mul i16 %l10, %l2
330  %add49 = add i16 %add37, %mul48
331  %mul54 = mul i16 %l13, %l5
332  %add55 = add i16 %add49, %mul54
333  %mul60 = mul i16 %l12, %l4
334  %add61 = add i16 %add55, %mul60
335  %mul72 = mul i16 %l14, %l6
336  %add73 = add i16 %add61, %mul72
337  ret i16 %add73
338}
339
340define i16 @reduce_blockstrided4(i16* nocapture noundef readonly %x, i16* nocapture noundef readonly %y, i32 noundef %stride) {
341; CHECK-LABEL: @reduce_blockstrided4(
342; CHECK-NEXT:  entry:
343; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64
344; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[IDXPROM]]
345; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[IDXPROM]]
346; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[X]] to <4 x i16>*
347; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2
348; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[ARRAYIDX4]] to <4 x i16>*
349; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2
350; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[Y]] to <4 x i16>*
351; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2
352; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16* [[ARRAYIDX20]] to <4 x i16>*
353; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[TMP6]], align 2
354; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i16> [[TMP5]], [[TMP1]]
355; CHECK-NEXT:    [[TMP9:%.*]] = mul <4 x i16> [[TMP7]], [[TMP3]]
356; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
357; CHECK-NEXT:    [[TMP11:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[TMP10]])
358; CHECK-NEXT:    ret i16 [[TMP11]]
359;
360entry:
361  %0 = load i16, i16* %x, align 2
362  %arrayidx1 = getelementptr inbounds i16, i16* %x, i64 1
363  %1 = load i16, i16* %arrayidx1, align 2
364  %arrayidx2 = getelementptr inbounds i16, i16* %x, i64 2
365  %2 = load i16, i16* %arrayidx2, align 2
366  %arrayidx3 = getelementptr inbounds i16, i16* %x, i64 3
367  %3 = load i16, i16* %arrayidx3, align 2
368  %idxprom = sext i32 %stride to i64
369  %arrayidx4 = getelementptr inbounds i16, i16* %x, i64 %idxprom
370  %4 = load i16, i16* %arrayidx4, align 2
371  %add5 = add nsw i32 %stride, 1
372  %idxprom6 = sext i32 %add5 to i64
373  %arrayidx7 = getelementptr inbounds i16, i16* %x, i64 %idxprom6
374  %5 = load i16, i16* %arrayidx7, align 2
375  %add8 = add nsw i32 %stride, 2
376  %idxprom9 = sext i32 %add8 to i64
377  %arrayidx10 = getelementptr inbounds i16, i16* %x, i64 %idxprom9
378  %6 = load i16, i16* %arrayidx10, align 2
379  %add11 = add nsw i32 %stride, 3
380  %idxprom12 = sext i32 %add11 to i64
381  %arrayidx13 = getelementptr inbounds i16, i16* %x, i64 %idxprom12
382  %7 = load i16, i16* %arrayidx13, align 2
383  %8 = load i16, i16* %y, align 2
384  %arrayidx15 = getelementptr inbounds i16, i16* %y, i64 1
385  %9 = load i16, i16* %arrayidx15, align 2
386  %arrayidx16 = getelementptr inbounds i16, i16* %y, i64 2
387  %10 = load i16, i16* %arrayidx16, align 2
388  %arrayidx17 = getelementptr inbounds i16, i16* %y, i64 3
389  %11 = load i16, i16* %arrayidx17, align 2
390  %arrayidx20 = getelementptr inbounds i16, i16* %y, i64 %idxprom
391  %12 = load i16, i16* %arrayidx20, align 2
392  %arrayidx23 = getelementptr inbounds i16, i16* %y, i64 %idxprom6
393  %13 = load i16, i16* %arrayidx23, align 2
394  %arrayidx26 = getelementptr inbounds i16, i16* %y, i64 %idxprom9
395  %14 = load i16, i16* %arrayidx26, align 2
396  %arrayidx29 = getelementptr inbounds i16, i16* %y, i64 %idxprom12
397  %15 = load i16, i16* %arrayidx29, align 2
398  %mul = mul i16 %8, %0
399  %mul36 = mul i16 %9, %1
400  %add37 = add i16 %mul36, %mul
401  %mul42 = mul i16 %11, %3
402  %add43 = add i16 %add37, %mul42
403  %mul48 = mul i16 %10, %2
404  %add49 = add i16 %add43, %mul48
405  %mul54 = mul i16 %13, %5
406  %add55 = add i16 %add49, %mul54
407  %mul60 = mul i16 %12, %4
408  %add61 = add i16 %add55, %mul60
409  %mul66 = mul i16 %15, %7
410  %add67 = add i16 %add61, %mul66
411  %mul72 = mul i16 %14, %6
412  %add73 = add i16 %add67, %mul72
413  ret i16 %add73
414}
415
416define i32 @reduce_blockstrided4x4(i8* nocapture noundef readonly %p1, i32 noundef %off1, i8* nocapture noundef readonly %p2, i32 noundef %off2) {
417; CHECK-LABEL: @reduce_blockstrided4x4(
418; CHECK-NEXT:  entry:
419; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[OFF1:%.*]] to i64
420; CHECK-NEXT:    [[IDX_EXT63:%.*]] = sext i32 [[OFF2:%.*]] to i64
421; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[P1:%.*]], i64 4
422; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[P2:%.*]], i64 4
423; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 [[IDX_EXT]]
424; CHECK-NEXT:    [[ADD_PTR64:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 [[IDX_EXT63]]
425; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 4
426; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 4
427; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[P1]] to <4 x i8>*
428; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1
429; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[P2]] to <4 x i8>*
430; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 1
431; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[ARRAYIDX3]] to <4 x i8>*
432; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1
433; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[ARRAYIDX5]] to <4 x i8>*
434; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i8>, <4 x i8>* [[TMP6]], align 1
435; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[ADD_PTR]] to <4 x i8>*
436; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1
437; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8* [[ADD_PTR64]] to <4 x i8>*
438; CHECK-NEXT:    [[TMP11:%.*]] = load <4 x i8>, <4 x i8>* [[TMP10]], align 1
439; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
440; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
441; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <16 x i8> [[TMP12]], <16 x i8> [[TMP13]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef>
442; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
443; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <16 x i8> [[TMP14]], <16 x i8> [[TMP15]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
444; CHECK-NEXT:    [[TMP17:%.*]] = zext <16 x i8> [[TMP16]] to <16 x i32>
445; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8* [[ARRAYIDX3_1]] to <4 x i8>*
446; CHECK-NEXT:    [[TMP19:%.*]] = load <4 x i8>, <4 x i8>* [[TMP18]], align 1
447; CHECK-NEXT:    [[TMP20:%.*]] = bitcast i8* [[ARRAYIDX5_1]] to <4 x i8>*
448; CHECK-NEXT:    [[TMP21:%.*]] = load <4 x i8>, <4 x i8>* [[TMP20]], align 1
449; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> [[TMP7]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
450; CHECK-NEXT:    [[TMP23:%.*]] = shufflevector <4 x i8> [[TMP19]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
451; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <16 x i8> [[TMP22]], <16 x i8> [[TMP23]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef>
452; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <4 x i8> [[TMP21]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
453; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <16 x i8> [[TMP24]], <16 x i8> [[TMP25]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
454; CHECK-NEXT:    [[TMP27:%.*]] = zext <16 x i8> [[TMP26]] to <16 x i32>
455; CHECK-NEXT:    [[TMP28:%.*]] = mul nuw nsw <16 x i32> [[TMP17]], [[TMP27]]
456; CHECK-NEXT:    [[TMP29:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP28]])
457; CHECK-NEXT:    ret i32 [[TMP29]]
458;
459entry:
460  %idx.ext = sext i32 %off1 to i64
461  %idx.ext63 = sext i32 %off2 to i64
462
463  %0 = load i8, i8* %p1, align 1
464  %conv = zext i8 %0 to i32
465  %1 = load i8, i8* %p2, align 1
466  %conv2 = zext i8 %1 to i32
467  %arrayidx3 = getelementptr inbounds i8, i8* %p1, i64 4
468  %2 = load i8, i8* %arrayidx3, align 1
469  %conv4 = zext i8 %2 to i32
470  %arrayidx5 = getelementptr inbounds i8, i8* %p2, i64 4
471  %3 = load i8, i8* %arrayidx5, align 1
472  %conv6 = zext i8 %3 to i32
473  %arrayidx8 = getelementptr inbounds i8, i8* %p1, i64 1
474  %4 = load i8, i8* %arrayidx8, align 1
475  %conv9 = zext i8 %4 to i32
476  %arrayidx10 = getelementptr inbounds i8, i8* %p2, i64 1
477  %5 = load i8, i8* %arrayidx10, align 1
478  %conv11 = zext i8 %5 to i32
479  %arrayidx13 = getelementptr inbounds i8, i8* %p1, i64 5
480  %6 = load i8, i8* %arrayidx13, align 1
481  %conv14 = zext i8 %6 to i32
482  %arrayidx15 = getelementptr inbounds i8, i8* %p2, i64 5
483  %7 = load i8, i8* %arrayidx15, align 1
484  %conv16 = zext i8 %7 to i32
485  %arrayidx20 = getelementptr inbounds i8, i8* %p1, i64 2
486  %8 = load i8, i8* %arrayidx20, align 1
487  %conv21 = zext i8 %8 to i32
488  %arrayidx22 = getelementptr inbounds i8, i8* %p2, i64 2
489  %9 = load i8, i8* %arrayidx22, align 1
490  %conv23 = zext i8 %9 to i32
491  %arrayidx25 = getelementptr inbounds i8, i8* %p1, i64 6
492  %10 = load i8, i8* %arrayidx25, align 1
493  %conv26 = zext i8 %10 to i32
494  %arrayidx27 = getelementptr inbounds i8, i8* %p2, i64 6
495  %11 = load i8, i8* %arrayidx27, align 1
496  %conv28 = zext i8 %11 to i32
497  %arrayidx32 = getelementptr inbounds i8, i8* %p1, i64 3
498  %12 = load i8, i8* %arrayidx32, align 1
499  %conv33 = zext i8 %12 to i32
500  %arrayidx34 = getelementptr inbounds i8, i8* %p2, i64 3
501  %13 = load i8, i8* %arrayidx34, align 1
502  %conv35 = zext i8 %13 to i32
503  %arrayidx37 = getelementptr inbounds i8, i8* %p1, i64 7
504  %14 = load i8, i8* %arrayidx37, align 1
505  %conv38 = zext i8 %14 to i32
506  %arrayidx39 = getelementptr inbounds i8, i8* %p2, i64 7
507  %15 = load i8, i8* %arrayidx39, align 1
508  %conv40 = zext i8 %15 to i32
509  %add.ptr = getelementptr inbounds i8, i8* %p1, i64 %idx.ext
510  %16 = load i8, i8* %add.ptr, align 1
511  %conv.1 = zext i8 %16 to i32
512  %add.ptr64 = getelementptr inbounds i8, i8* %p2, i64 %idx.ext63
513  %17 = load i8, i8* %add.ptr64, align 1
514  %conv2.1 = zext i8 %17 to i32
515  %arrayidx3.1 = getelementptr inbounds i8, i8* %add.ptr, i64 4
516  %18 = load i8, i8* %arrayidx3.1, align 1
517  %conv4.1 = zext i8 %18 to i32
518  %arrayidx5.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 4
519  %19 = load i8, i8* %arrayidx5.1, align 1
520  %conv6.1 = zext i8 %19 to i32
521  %arrayidx8.1 = getelementptr inbounds i8, i8* %add.ptr, i64 1
522  %20 = load i8, i8* %arrayidx8.1, align 1
523  %conv9.1 = zext i8 %20 to i32
524  %arrayidx10.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 1
525  %21 = load i8, i8* %arrayidx10.1, align 1
526  %conv11.1 = zext i8 %21 to i32
527  %arrayidx13.1 = getelementptr inbounds i8, i8* %add.ptr, i64 5
528  %22 = load i8, i8* %arrayidx13.1, align 1
529  %conv14.1 = zext i8 %22 to i32
530  %arrayidx15.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 5
531  %23 = load i8, i8* %arrayidx15.1, align 1
532  %conv16.1 = zext i8 %23 to i32
533  %arrayidx20.1 = getelementptr inbounds i8, i8* %add.ptr, i64 2
534  %24 = load i8, i8* %arrayidx20.1, align 1
535  %conv21.1 = zext i8 %24 to i32
536  %arrayidx22.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 2
537  %25 = load i8, i8* %arrayidx22.1, align 1
538  %conv23.1 = zext i8 %25 to i32
539  %arrayidx25.1 = getelementptr inbounds i8, i8* %add.ptr, i64 6
540  %26 = load i8, i8* %arrayidx25.1, align 1
541  %conv26.1 = zext i8 %26 to i32
542  %arrayidx27.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 6
543  %27 = load i8, i8* %arrayidx27.1, align 1
544  %conv28.1 = zext i8 %27 to i32
545  %arrayidx32.1 = getelementptr inbounds i8, i8* %add.ptr, i64 3
546  %28 = load i8, i8* %arrayidx32.1, align 1
547  %conv33.1 = zext i8 %28 to i32
548  %arrayidx34.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 3
549  %29 = load i8, i8* %arrayidx34.1, align 1
550  %conv35.1 = zext i8 %29 to i32
551  %arrayidx37.1 = getelementptr inbounds i8, i8* %add.ptr, i64 7
552  %30 = load i8, i8* %arrayidx37.1, align 1
553  %conv38.1 = zext i8 %30 to i32
554  %arrayidx39.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 7
555  %31 = load i8, i8* %arrayidx39.1, align 1
556  %conv40.1 = zext i8 %31 to i32
557  %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr, i64 %idx.ext
558  %32 = load i8, i8* %add.ptr.1, align 1
559  %conv.2 = zext i8 %32 to i32
560  %add.ptr64.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 %idx.ext63
561  %33 = load i8, i8* %add.ptr64.1, align 1
562  %conv2.2 = zext i8 %33 to i32
563  %arrayidx3.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 4
564  %34 = load i8, i8* %arrayidx3.2, align 1
565  %conv4.2 = zext i8 %34 to i32
566  %arrayidx5.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 4
567  %35 = load i8, i8* %arrayidx5.2, align 1
568  %conv6.2 = zext i8 %35 to i32
569  %arrayidx8.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 1
570  %36 = load i8, i8* %arrayidx8.2, align 1
571  %conv9.2 = zext i8 %36 to i32
572  %arrayidx10.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 1
573  %37 = load i8, i8* %arrayidx10.2, align 1
574  %conv11.2 = zext i8 %37 to i32
575  %arrayidx13.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 5
576  %38 = load i8, i8* %arrayidx13.2, align 1
577  %conv14.2 = zext i8 %38 to i32
578  %arrayidx15.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 5
579  %39 = load i8, i8* %arrayidx15.2, align 1
580  %conv16.2 = zext i8 %39 to i32
581  %arrayidx20.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 2
582  %40 = load i8, i8* %arrayidx20.2, align 1
583  %conv21.2 = zext i8 %40 to i32
584  %arrayidx22.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 2
585  %41 = load i8, i8* %arrayidx22.2, align 1
586  %conv23.2 = zext i8 %41 to i32
587  %arrayidx25.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 6
588  %42 = load i8, i8* %arrayidx25.2, align 1
589  %conv26.2 = zext i8 %42 to i32
590  %arrayidx27.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 6
591  %43 = load i8, i8* %arrayidx27.2, align 1
592  %conv28.2 = zext i8 %43 to i32
593  %arrayidx32.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 3
594  %44 = load i8, i8* %arrayidx32.2, align 1
595  %conv33.2 = zext i8 %44 to i32
596  %arrayidx34.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 3
597  %45 = load i8, i8* %arrayidx34.2, align 1
598  %conv35.2 = zext i8 %45 to i32
599  %arrayidx37.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 7
600  %46 = load i8, i8* %arrayidx37.2, align 1
601  %conv38.2 = zext i8 %46 to i32
602  %arrayidx39.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 7
603  %47 = load i8, i8* %arrayidx39.2, align 1
604  %conv40.2 = zext i8 %47 to i32
605  %add.ptr.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 %idx.ext
606  %48 = load i8, i8* %add.ptr.2, align 1
607  %conv.3 = zext i8 %48 to i32
608  %add.ptr64.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 %idx.ext63
609  %49 = load i8, i8* %add.ptr64.2, align 1
610  %conv2.3 = zext i8 %49 to i32
611  %arrayidx3.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 4
612  %50 = load i8, i8* %arrayidx3.3, align 1
613  %conv4.3 = zext i8 %50 to i32
614  %arrayidx5.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 4
615  %51 = load i8, i8* %arrayidx5.3, align 1
616  %conv6.3 = zext i8 %51 to i32
617  %arrayidx8.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 1
618  %52 = load i8, i8* %arrayidx8.3, align 1
619  %conv9.3 = zext i8 %52 to i32
620  %arrayidx10.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 1
621  %53 = load i8, i8* %arrayidx10.3, align 1
622  %conv11.3 = zext i8 %53 to i32
623  %arrayidx13.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 5
624  %54 = load i8, i8* %arrayidx13.3, align 1
625  %conv14.3 = zext i8 %54 to i32
626  %arrayidx15.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 5
627  %55 = load i8, i8* %arrayidx15.3, align 1
628  %conv16.3 = zext i8 %55 to i32
629  %arrayidx20.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 2
630  %56 = load i8, i8* %arrayidx20.3, align 1
631  %conv21.3 = zext i8 %56 to i32
632  %arrayidx22.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 2
633  %57 = load i8, i8* %arrayidx22.3, align 1
634  %conv23.3 = zext i8 %57 to i32
635  %arrayidx25.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 6
636  %58 = load i8, i8* %arrayidx25.3, align 1
637  %conv26.3 = zext i8 %58 to i32
638  %arrayidx27.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 6
639  %59 = load i8, i8* %arrayidx27.3, align 1
640  %conv28.3 = zext i8 %59 to i32
641  %arrayidx32.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 3
642  %60 = load i8, i8* %arrayidx32.3, align 1
643  %conv33.3 = zext i8 %60 to i32
644  %arrayidx34.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 3
645  %61 = load i8, i8* %arrayidx34.3, align 1
646  %conv35.3 = zext i8 %61 to i32
647  %arrayidx37.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 7
648  %62 = load i8, i8* %arrayidx37.3, align 1
649  %conv38.3 = zext i8 %62 to i32
650  %arrayidx39.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 7
651  %63 = load i8, i8* %arrayidx39.3, align 1
652  %conv40.3 = zext i8 %63 to i32
653
654  %m1 = mul i32 %conv, %conv4
655  %m2 = mul i32 %conv9, %conv14
656  %m3 = mul i32 %conv21, %conv26
657  %m4 = mul i32 %conv33, %conv38
658  %m8 = mul i32 %conv2, %conv6
659  %m7 = mul i32 %conv11, %conv16
660  %m6 = mul i32 %conv23, %conv28
661  %m5 = mul i32 %conv35, %conv40
662  %m9 = mul i32 %conv.1, %conv4.1
663  %m10 = mul i32 %conv9.1, %conv14.1
664  %m11 = mul i32 %conv21.1, %conv26.1
665  %m12 = mul i32 %conv33.1, %conv38.1
666  %m16 = mul i32 %conv2.1, %conv6.1
667  %m15 = mul i32 %conv11.1, %conv16.1
668  %m14 = mul i32 %conv23.1, %conv28.1
669  %m13 = mul i32 %conv35.1, %conv40.1
670
671  %a2 = add i32 %m1, %m2
672  %a3 = add i32 %a2, %m3
673  %a4 = add i32 %a3, %m4
674  %a5 = add i32 %a4, %m5
675  %a6 = add i32 %a5, %m6
676  %a7 = add i32 %a6, %m7
677  %a8 = add i32 %a7, %m8
678  %a9 = add i32 %a8, %m9
679  %a10 = add i32 %a9, %m10
680  %a11 = add i32 %a10, %m11
681  %a12 = add i32 %a11, %m12
682  %a13 = add i32 %a12, %m13
683  %a14 = add i32 %a13, %m14
684  %a15 = add i32 %a14, %m15
685  %a16 = add i32 %a15, %m16
686  ret i32 %a16
687}
688
689define void @store_blockstrided3(i32* nocapture noundef readonly %x, i32* nocapture noundef readonly %y, i32* nocapture noundef writeonly %z, i32 noundef %stride) {
690; CHECK-LABEL: @store_blockstrided3(
691; CHECK-NEXT:  entry:
692; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 2
693; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
694; CHECK-NEXT:    [[ADD4:%.*]] = add nsw i32 [[STRIDE:%.*]], 1
695; CHECK-NEXT:    [[IDXPROM5:%.*]] = sext i32 [[ADD4]] to i64
696; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM5]]
697; CHECK-NEXT:    [[MUL:%.*]] = shl nsw i32 [[STRIDE]], 1
698; CHECK-NEXT:    [[IDXPROM11:%.*]] = sext i32 [[MUL]] to i64
699; CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM11]]
700; CHECK-NEXT:    [[ADD18:%.*]] = add nsw i32 [[MUL]], 2
701; CHECK-NEXT:    [[IDXPROM19:%.*]] = sext i32 [[ADD18]] to i64
702; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM19]]
703; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX20]], align 4
704; CHECK-NEXT:    [[MUL21:%.*]] = mul nsw i32 [[STRIDE]], 3
705; CHECK-NEXT:    [[IDXPROM23:%.*]] = sext i32 [[MUL21]] to i64
706; CHECK-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM23]]
707; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX24]], align 4
708; CHECK-NEXT:    [[ADD26:%.*]] = add nsw i32 [[MUL21]], 1
709; CHECK-NEXT:    [[IDXPROM27:%.*]] = sext i32 [[ADD26]] to i64
710; CHECK-NEXT:    [[ARRAYIDX28:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[IDXPROM27]]
711; CHECK-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i64 2
712; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX35]], align 4
713; CHECK-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[IDXPROM5]]
714; CHECK-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[IDXPROM11]]
715; CHECK-NEXT:    [[ARRAYIDX56:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[IDXPROM19]]
716; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX56]], align 4
717; CHECK-NEXT:    [[ARRAYIDX60:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[IDXPROM23]]
718; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX60]], align 4
719; CHECK-NEXT:    [[ARRAYIDX64:%.*]] = getelementptr inbounds i32, i32* [[Y]], i64 [[IDXPROM27]]
720; CHECK-NEXT:    [[ARRAYIDX72:%.*]] = getelementptr inbounds i32, i32* [[Z:%.*]], i64 1
721; CHECK-NEXT:    [[MUL73:%.*]] = mul nsw i32 [[TMP3]], [[TMP0]]
722; CHECK-NEXT:    [[ARRAYIDX76:%.*]] = getelementptr inbounds i32, i32* [[Z]], i64 6
723; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[X]] to <2 x i32>*
724; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[TMP6]], align 4
725; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[ARRAYIDX6]] to <2 x i32>*
726; CHECK-NEXT:    [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[TMP8]], align 4
727; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i32* [[Y]] to <2 x i32>*
728; CHECK-NEXT:    [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[TMP10]], align 4
729; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[ARRAYIDX41]] to <2 x i32>*
730; CHECK-NEXT:    [[TMP13:%.*]] = load <2 x i32>, <2 x i32>* [[TMP12]], align 4
731; CHECK-NEXT:    [[TMP14:%.*]] = mul nsw <2 x i32> [[TMP11]], [[TMP7]]
732; CHECK-NEXT:    [[TMP15:%.*]] = mul nsw <2 x i32> [[TMP13]], [[TMP9]]
733; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
734; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i32* [[ARRAYIDX72]] to <4 x i32>*
735; CHECK-NEXT:    [[ARRAYIDX84:%.*]] = getelementptr inbounds i32, i32* [[Z]], i64 7
736; CHECK-NEXT:    [[MUL85:%.*]] = mul nsw i32 [[TMP4]], [[TMP1]]
737; CHECK-NEXT:    [[MUL87:%.*]] = mul nsw i32 [[TMP5]], [[TMP2]]
738; CHECK-NEXT:    [[ARRAYIDX88:%.*]] = getelementptr inbounds i32, i32* [[Z]], i64 11
739; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i32* [[ARRAYIDX12]] to <2 x i32>*
740; CHECK-NEXT:    [[TMP18:%.*]] = load <2 x i32>, <2 x i32>* [[TMP17]], align 4
741; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i32* [[ARRAYIDX28]] to <2 x i32>*
742; CHECK-NEXT:    [[TMP20:%.*]] = load <2 x i32>, <2 x i32>* [[TMP19]], align 4
743; CHECK-NEXT:    [[TMP21:%.*]] = bitcast i32* [[ARRAYIDX48]] to <2 x i32>*
744; CHECK-NEXT:    [[TMP22:%.*]] = load <2 x i32>, <2 x i32>* [[TMP21]], align 4
745; CHECK-NEXT:    [[TMP23:%.*]] = bitcast i32* [[ARRAYIDX64]] to <2 x i32>*
746; CHECK-NEXT:    [[TMP24:%.*]] = load <2 x i32>, <2 x i32>* [[TMP23]], align 4
747; CHECK-NEXT:    store i32 [[MUL73]], i32* [[Z]], align 4
748; CHECK-NEXT:    store <4 x i32> [[SHUFFLE]], <4 x i32>* [[TMP16]], align 4
749; CHECK-NEXT:    store i32 [[MUL85]], i32* [[ARRAYIDX76]], align 4
750; CHECK-NEXT:    store i32 [[MUL87]], i32* [[ARRAYIDX88]], align 4
751; CHECK-NEXT:    [[TMP25:%.*]] = mul nsw <2 x i32> [[TMP22]], [[TMP18]]
752; CHECK-NEXT:    [[TMP26:%.*]] = mul nsw <2 x i32> [[TMP24]], [[TMP20]]
753; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <2 x i32> [[TMP25]], <2 x i32> [[TMP26]], <4 x i32> <i32 1, i32 0, i32 3, i32 2>
754; CHECK-NEXT:    [[TMP27:%.*]] = bitcast i32* [[ARRAYIDX84]] to <4 x i32>*
755; CHECK-NEXT:    store <4 x i32> [[SHUFFLE1]], <4 x i32>* [[TMP27]], align 4
756; CHECK-NEXT:    ret void
757;
758entry:
759  %0 = load i32, i32* %x, align 4
760  %arrayidx1 = getelementptr inbounds i32, i32* %x, i64 1
761  %1 = load i32, i32* %arrayidx1, align 4
762  %arrayidx2 = getelementptr inbounds i32, i32* %x, i64 2
763  %2 = load i32, i32* %arrayidx2, align 4
764  %add4 = add nsw i32 %stride, 1
765  %idxprom5 = sext i32 %add4 to i64
766  %arrayidx6 = getelementptr inbounds i32, i32* %x, i64 %idxprom5
767  %3 = load i32, i32* %arrayidx6, align 4
768  %add7 = add nsw i32 %stride, 2
769  %idxprom8 = sext i32 %add7 to i64
770  %arrayidx9 = getelementptr inbounds i32, i32* %x, i64 %idxprom8
771  %4 = load i32, i32* %arrayidx9, align 4
772  %mul = shl nsw i32 %stride, 1
773  %idxprom11 = sext i32 %mul to i64
774  %arrayidx12 = getelementptr inbounds i32, i32* %x, i64 %idxprom11
775  %5 = load i32, i32* %arrayidx12, align 4
776  %add14 = or i32 %mul, 1
777  %idxprom15 = sext i32 %add14 to i64
778  %arrayidx16 = getelementptr inbounds i32, i32* %x, i64 %idxprom15
779  %6 = load i32, i32* %arrayidx16, align 4
780  %add18 = add nsw i32 %mul, 2
781  %idxprom19 = sext i32 %add18 to i64
782  %arrayidx20 = getelementptr inbounds i32, i32* %x, i64 %idxprom19
783  %7 = load i32, i32* %arrayidx20, align 4
784  %mul21 = mul nsw i32 %stride, 3
785  %idxprom23 = sext i32 %mul21 to i64
786  %arrayidx24 = getelementptr inbounds i32, i32* %x, i64 %idxprom23
787  %8 = load i32, i32* %arrayidx24, align 4
788  %add26 = add nsw i32 %mul21, 1
789  %idxprom27 = sext i32 %add26 to i64
790  %arrayidx28 = getelementptr inbounds i32, i32* %x, i64 %idxprom27
791  %9 = load i32, i32* %arrayidx28, align 4
792  %add30 = add nsw i32 %mul21, 2
793  %idxprom31 = sext i32 %add30 to i64
794  %arrayidx32 = getelementptr inbounds i32, i32* %x, i64 %idxprom31
795  %10 = load i32, i32* %arrayidx32, align 4
796  %11 = load i32, i32* %y, align 4
797  %arrayidx34 = getelementptr inbounds i32, i32* %y, i64 1
798  %12 = load i32, i32* %arrayidx34, align 4
799  %arrayidx35 = getelementptr inbounds i32, i32* %y, i64 2
800  %13 = load i32, i32* %arrayidx35, align 4
801  %arrayidx41 = getelementptr inbounds i32, i32* %y, i64 %idxprom5
802  %14 = load i32, i32* %arrayidx41, align 4
803  %arrayidx44 = getelementptr inbounds i32, i32* %y, i64 %idxprom8
804  %15 = load i32, i32* %arrayidx44, align 4
805  %arrayidx48 = getelementptr inbounds i32, i32* %y, i64 %idxprom11
806  %16 = load i32, i32* %arrayidx48, align 4
807  %arrayidx52 = getelementptr inbounds i32, i32* %y, i64 %idxprom15
808  %17 = load i32, i32* %arrayidx52, align 4
809  %arrayidx56 = getelementptr inbounds i32, i32* %y, i64 %idxprom19
810  %18 = load i32, i32* %arrayidx56, align 4
811  %arrayidx60 = getelementptr inbounds i32, i32* %y, i64 %idxprom23
812  %19 = load i32, i32* %arrayidx60, align 4
813  %arrayidx64 = getelementptr inbounds i32, i32* %y, i64 %idxprom27
814  %20 = load i32, i32* %arrayidx64, align 4
815  %arrayidx68 = getelementptr inbounds i32, i32* %y, i64 %idxprom31
816  %21 = load i32, i32* %arrayidx68, align 4
817  %mul69 = mul nsw i32 %11, %0
818  %arrayidx70 = getelementptr inbounds i32, i32* %z, i64 2
819  store i32 %mul69, i32* %arrayidx70, align 4
820  %mul71 = mul nsw i32 %12, %1
821  %arrayidx72 = getelementptr inbounds i32, i32* %z, i64 1
822  store i32 %mul71, i32* %arrayidx72, align 4
823  %mul73 = mul nsw i32 %13, %2
824  store i32 %mul73, i32* %z, align 4
825  %arrayidx76 = getelementptr inbounds i32, i32* %z, i64 6
826  %mul77 = mul nsw i32 %14, %3
827  %arrayidx78 = getelementptr inbounds i32, i32* %z, i64 4
828  store i32 %mul77, i32* %arrayidx78, align 4
829  %mul79 = mul nsw i32 %15, %4
830  %arrayidx80 = getelementptr inbounds i32, i32* %z, i64 3
831  store i32 %mul79, i32* %arrayidx80, align 4
832  %mul81 = mul nsw i32 %16, %5
833  %arrayidx82 = getelementptr inbounds i32, i32* %z, i64 8
834  store i32 %mul81, i32* %arrayidx82, align 4
835  %mul83 = mul nsw i32 %17, %6
836  %arrayidx84 = getelementptr inbounds i32, i32* %z, i64 7
837  store i32 %mul83, i32* %arrayidx84, align 4
838  %mul85 = mul nsw i32 %18, %7
839  store i32 %mul85, i32* %arrayidx76, align 4
840  %mul87 = mul nsw i32 %19, %8
841  %arrayidx88 = getelementptr inbounds i32, i32* %z, i64 11
842  store i32 %mul87, i32* %arrayidx88, align 4
843  %mul89 = mul nsw i32 %20, %9
844  %arrayidx90 = getelementptr inbounds i32, i32* %z, i64 10
845  store i32 %mul89, i32* %arrayidx90, align 4
846  %mul91 = mul nsw i32 %21, %10
847  %arrayidx92 = getelementptr inbounds i32, i32* %z, i64 9
848  store i32 %mul91, i32* %arrayidx92, align 4
849  ret void
850}
851
852define void @store_blockstrided4(i16* nocapture noundef readonly %x, i16* nocapture noundef readonly %y, i32 noundef %stride, i16 *%dst0) {
853; CHECK-LABEL: @store_blockstrided4(
854; CHECK-NEXT:  entry:
855; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[STRIDE:%.*]] to i64
856; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i64 [[IDXPROM]]
857; CHECK-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i64 [[IDXPROM]]
858; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[X]] to <4 x i16>*
859; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 2
860; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[ARRAYIDX4]] to <4 x i16>*
861; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2
862; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[Y]] to <4 x i16>*
863; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2
864; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16* [[ARRAYIDX20]] to <4 x i16>*
865; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[TMP6]], align 2
866; CHECK-NEXT:    [[TMP8:%.*]] = mul <4 x i16> [[TMP5]], [[TMP1]]
867; CHECK-NEXT:    [[TMP9:%.*]] = mul <4 x i16> [[TMP7]], [[TMP3]]
868; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <8 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
869; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16* [[DST0:%.*]] to <8 x i16>*
870; CHECK-NEXT:    store <8 x i16> [[SHUFFLE]], <8 x i16>* [[TMP10]], align 2
871; CHECK-NEXT:    ret void
872;
873entry:
874  %0 = load i16, i16* %x, align 2
875  %arrayidx1 = getelementptr inbounds i16, i16* %x, i64 1
876  %1 = load i16, i16* %arrayidx1, align 2
877  %arrayidx2 = getelementptr inbounds i16, i16* %x, i64 2
878  %2 = load i16, i16* %arrayidx2, align 2
879  %arrayidx3 = getelementptr inbounds i16, i16* %x, i64 3
880  %3 = load i16, i16* %arrayidx3, align 2
881  %idxprom = sext i32 %stride to i64
882  %arrayidx4 = getelementptr inbounds i16, i16* %x, i64 %idxprom
883  %4 = load i16, i16* %arrayidx4, align 2
884  %add5 = add nsw i32 %stride, 1
885  %idxprom6 = sext i32 %add5 to i64
886  %arrayidx7 = getelementptr inbounds i16, i16* %x, i64 %idxprom6
887  %5 = load i16, i16* %arrayidx7, align 2
888  %add8 = add nsw i32 %stride, 2
889  %idxprom9 = sext i32 %add8 to i64
890  %arrayidx10 = getelementptr inbounds i16, i16* %x, i64 %idxprom9
891  %6 = load i16, i16* %arrayidx10, align 2
892  %add11 = add nsw i32 %stride, 3
893  %idxprom12 = sext i32 %add11 to i64
894  %arrayidx13 = getelementptr inbounds i16, i16* %x, i64 %idxprom12
895  %7 = load i16, i16* %arrayidx13, align 2
896  %8 = load i16, i16* %y, align 2
897  %arrayidx15 = getelementptr inbounds i16, i16* %y, i64 1
898  %9 = load i16, i16* %arrayidx15, align 2
899  %arrayidx16 = getelementptr inbounds i16, i16* %y, i64 2
900  %10 = load i16, i16* %arrayidx16, align 2
901  %arrayidx17 = getelementptr inbounds i16, i16* %y, i64 3
902  %11 = load i16, i16* %arrayidx17, align 2
903  %arrayidx20 = getelementptr inbounds i16, i16* %y, i64 %idxprom
904  %12 = load i16, i16* %arrayidx20, align 2
905  %arrayidx23 = getelementptr inbounds i16, i16* %y, i64 %idxprom6
906  %13 = load i16, i16* %arrayidx23, align 2
907  %arrayidx26 = getelementptr inbounds i16, i16* %y, i64 %idxprom9
908  %14 = load i16, i16* %arrayidx26, align 2
909  %arrayidx29 = getelementptr inbounds i16, i16* %y, i64 %idxprom12
910  %15 = load i16, i16* %arrayidx29, align 2
911  %mul = mul i16 %8, %0
912  %mul36 = mul i16 %9, %1
913  %mul42 = mul i16 %11, %3
914  %mul48 = mul i16 %10, %2
915  %mul54 = mul i16 %13, %5
916  %mul60 = mul i16 %12, %4
917  %mul66 = mul i16 %15, %7
918  %mul72 = mul i16 %14, %6
919  %dst1 = getelementptr inbounds i16, i16* %dst0, i64 1
920  %dst2 = getelementptr inbounds i16, i16* %dst0, i64 2
921  %dst3 = getelementptr inbounds i16, i16* %dst0, i64 3
922  %dst4 = getelementptr inbounds i16, i16* %dst0, i64 4
923  %dst5 = getelementptr inbounds i16, i16* %dst0, i64 5
924  %dst6 = getelementptr inbounds i16, i16* %dst0, i64 6
925  %dst7 = getelementptr inbounds i16, i16* %dst0, i64 7
926  store i16 %mul, i16* %dst0
927  store i16 %mul36, i16* %dst1
928  store i16 %mul42, i16* %dst2
929  store i16 %mul48, i16* %dst3
930  store i16 %mul54, i16* %dst4
931  store i16 %mul60, i16* %dst5
932  store i16 %mul66, i16* %dst6
933  store i16 %mul72, i16* %dst7
934  ret void
935}
936
937define void @store_blockstrided4x4(i8* nocapture noundef readonly %p1, i32 noundef %off1, i8* nocapture noundef readonly %p2, i32 noundef %off2, i32 *%dst0) {
938; CHECK-LABEL: @store_blockstrided4x4(
939; CHECK-NEXT:  entry:
940; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[OFF1:%.*]] to i64
941; CHECK-NEXT:    [[IDX_EXT63:%.*]] = sext i32 [[OFF2:%.*]] to i64
942; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[P1:%.*]], i64 4
943; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[P2:%.*]], i64 4
944; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 [[IDX_EXT]]
945; CHECK-NEXT:    [[ADD_PTR64:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 [[IDX_EXT63]]
946; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 4
947; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 4
948; CHECK-NEXT:    [[DST4:%.*]] = getelementptr inbounds i32, i32* [[DST0:%.*]], i64 4
949; CHECK-NEXT:    [[DST8:%.*]] = getelementptr inbounds i32, i32* [[DST0]], i64 8
950; CHECK-NEXT:    [[DST12:%.*]] = getelementptr inbounds i32, i32* [[DST0]], i64 12
951; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[P1]] to <4 x i8>*
952; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1
953; CHECK-NEXT:    [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
954; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[ARRAYIDX3]] to <4 x i8>*
955; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1
956; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i8> [[TMP4]] to <4 x i32>
957; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw nsw <4 x i32> [[TMP2]], [[TMP5]]
958; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[DST0]] to <4 x i32>*
959; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[P2]] to <4 x i8>*
960; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1
961; CHECK-NEXT:    [[TMP10:%.*]] = zext <4 x i8> [[TMP9]] to <4 x i32>
962; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8* [[ARRAYIDX5]] to <4 x i8>*
963; CHECK-NEXT:    [[TMP12:%.*]] = load <4 x i8>, <4 x i8>* [[TMP11]], align 1
964; CHECK-NEXT:    [[TMP13:%.*]] = zext <4 x i8> [[TMP12]] to <4 x i32>
965; CHECK-NEXT:    [[TMP14:%.*]] = mul nuw nsw <4 x i32> [[TMP10]], [[TMP13]]
966; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i32* [[DST4]] to <4 x i32>*
967; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8* [[ADD_PTR]] to <4 x i8>*
968; CHECK-NEXT:    [[TMP17:%.*]] = load <4 x i8>, <4 x i8>* [[TMP16]], align 1
969; CHECK-NEXT:    [[TMP18:%.*]] = zext <4 x i8> [[TMP17]] to <4 x i32>
970; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i8* [[ARRAYIDX3_1]] to <4 x i8>*
971; CHECK-NEXT:    [[TMP20:%.*]] = load <4 x i8>, <4 x i8>* [[TMP19]], align 1
972; CHECK-NEXT:    [[TMP21:%.*]] = zext <4 x i8> [[TMP20]] to <4 x i32>
973; CHECK-NEXT:    [[TMP22:%.*]] = mul nuw nsw <4 x i32> [[TMP18]], [[TMP21]]
974; CHECK-NEXT:    [[TMP23:%.*]] = bitcast i32* [[DST8]] to <4 x i32>*
975; CHECK-NEXT:    [[TMP24:%.*]] = bitcast i8* [[ADD_PTR64]] to <4 x i8>*
976; CHECK-NEXT:    [[TMP25:%.*]] = load <4 x i8>, <4 x i8>* [[TMP24]], align 1
977; CHECK-NEXT:    [[TMP26:%.*]] = zext <4 x i8> [[TMP25]] to <4 x i32>
978; CHECK-NEXT:    [[TMP27:%.*]] = bitcast i8* [[ARRAYIDX5_1]] to <4 x i8>*
979; CHECK-NEXT:    [[TMP28:%.*]] = load <4 x i8>, <4 x i8>* [[TMP27]], align 1
980; CHECK-NEXT:    [[TMP29:%.*]] = zext <4 x i8> [[TMP28]] to <4 x i32>
981; CHECK-NEXT:    [[TMP30:%.*]] = mul nuw nsw <4 x i32> [[TMP26]], [[TMP29]]
982; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP7]], align 4
983; CHECK-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP15]], align 4
984; CHECK-NEXT:    store <4 x i32> [[TMP22]], <4 x i32>* [[TMP23]], align 4
985; CHECK-NEXT:    [[TMP31:%.*]] = bitcast i32* [[DST12]] to <4 x i32>*
986; CHECK-NEXT:    store <4 x i32> [[TMP30]], <4 x i32>* [[TMP31]], align 4
987; CHECK-NEXT:    ret void
988;
989entry:
990  %idx.ext = sext i32 %off1 to i64
991  %idx.ext63 = sext i32 %off2 to i64
992
993  %0 = load i8, i8* %p1, align 1
994  %conv = zext i8 %0 to i32
995  %1 = load i8, i8* %p2, align 1
996  %conv2 = zext i8 %1 to i32
997  %arrayidx3 = getelementptr inbounds i8, i8* %p1, i64 4
998  %2 = load i8, i8* %arrayidx3, align 1
999  %conv4 = zext i8 %2 to i32
1000  %arrayidx5 = getelementptr inbounds i8, i8* %p2, i64 4
1001  %3 = load i8, i8* %arrayidx5, align 1
1002  %conv6 = zext i8 %3 to i32
1003  %arrayidx8 = getelementptr inbounds i8, i8* %p1, i64 1
1004  %4 = load i8, i8* %arrayidx8, align 1
1005  %conv9 = zext i8 %4 to i32
1006  %arrayidx10 = getelementptr inbounds i8, i8* %p2, i64 1
1007  %5 = load i8, i8* %arrayidx10, align 1
1008  %conv11 = zext i8 %5 to i32
1009  %arrayidx13 = getelementptr inbounds i8, i8* %p1, i64 5
1010  %6 = load i8, i8* %arrayidx13, align 1
1011  %conv14 = zext i8 %6 to i32
1012  %arrayidx15 = getelementptr inbounds i8, i8* %p2, i64 5
1013  %7 = load i8, i8* %arrayidx15, align 1
1014  %conv16 = zext i8 %7 to i32
1015  %arrayidx20 = getelementptr inbounds i8, i8* %p1, i64 2
1016  %8 = load i8, i8* %arrayidx20, align 1
1017  %conv21 = zext i8 %8 to i32
1018  %arrayidx22 = getelementptr inbounds i8, i8* %p2, i64 2
1019  %9 = load i8, i8* %arrayidx22, align 1
1020  %conv23 = zext i8 %9 to i32
1021  %arrayidx25 = getelementptr inbounds i8, i8* %p1, i64 6
1022  %10 = load i8, i8* %arrayidx25, align 1
1023  %conv26 = zext i8 %10 to i32
1024  %arrayidx27 = getelementptr inbounds i8, i8* %p2, i64 6
1025  %11 = load i8, i8* %arrayidx27, align 1
1026  %conv28 = zext i8 %11 to i32
1027  %arrayidx32 = getelementptr inbounds i8, i8* %p1, i64 3
1028  %12 = load i8, i8* %arrayidx32, align 1
1029  %conv33 = zext i8 %12 to i32
1030  %arrayidx34 = getelementptr inbounds i8, i8* %p2, i64 3
1031  %13 = load i8, i8* %arrayidx34, align 1
1032  %conv35 = zext i8 %13 to i32
1033  %arrayidx37 = getelementptr inbounds i8, i8* %p1, i64 7
1034  %14 = load i8, i8* %arrayidx37, align 1
1035  %conv38 = zext i8 %14 to i32
1036  %arrayidx39 = getelementptr inbounds i8, i8* %p2, i64 7
1037  %15 = load i8, i8* %arrayidx39, align 1
1038  %conv40 = zext i8 %15 to i32
1039  %add.ptr = getelementptr inbounds i8, i8* %p1, i64 %idx.ext
1040  %16 = load i8, i8* %add.ptr, align 1
1041  %conv.1 = zext i8 %16 to i32
1042  %add.ptr64 = getelementptr inbounds i8, i8* %p2, i64 %idx.ext63
1043  %17 = load i8, i8* %add.ptr64, align 1
1044  %conv2.1 = zext i8 %17 to i32
1045  %arrayidx3.1 = getelementptr inbounds i8, i8* %add.ptr, i64 4
1046  %18 = load i8, i8* %arrayidx3.1, align 1
1047  %conv4.1 = zext i8 %18 to i32
1048  %arrayidx5.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 4
1049  %19 = load i8, i8* %arrayidx5.1, align 1
1050  %conv6.1 = zext i8 %19 to i32
1051  %arrayidx8.1 = getelementptr inbounds i8, i8* %add.ptr, i64 1
1052  %20 = load i8, i8* %arrayidx8.1, align 1
1053  %conv9.1 = zext i8 %20 to i32
1054  %arrayidx10.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 1
1055  %21 = load i8, i8* %arrayidx10.1, align 1
1056  %conv11.1 = zext i8 %21 to i32
1057  %arrayidx13.1 = getelementptr inbounds i8, i8* %add.ptr, i64 5
1058  %22 = load i8, i8* %arrayidx13.1, align 1
1059  %conv14.1 = zext i8 %22 to i32
1060  %arrayidx15.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 5
1061  %23 = load i8, i8* %arrayidx15.1, align 1
1062  %conv16.1 = zext i8 %23 to i32
1063  %arrayidx20.1 = getelementptr inbounds i8, i8* %add.ptr, i64 2
1064  %24 = load i8, i8* %arrayidx20.1, align 1
1065  %conv21.1 = zext i8 %24 to i32
1066  %arrayidx22.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 2
1067  %25 = load i8, i8* %arrayidx22.1, align 1
1068  %conv23.1 = zext i8 %25 to i32
1069  %arrayidx25.1 = getelementptr inbounds i8, i8* %add.ptr, i64 6
1070  %26 = load i8, i8* %arrayidx25.1, align 1
1071  %conv26.1 = zext i8 %26 to i32
1072  %arrayidx27.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 6
1073  %27 = load i8, i8* %arrayidx27.1, align 1
1074  %conv28.1 = zext i8 %27 to i32
1075  %arrayidx32.1 = getelementptr inbounds i8, i8* %add.ptr, i64 3
1076  %28 = load i8, i8* %arrayidx32.1, align 1
1077  %conv33.1 = zext i8 %28 to i32
1078  %arrayidx34.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 3
1079  %29 = load i8, i8* %arrayidx34.1, align 1
1080  %conv35.1 = zext i8 %29 to i32
1081  %arrayidx37.1 = getelementptr inbounds i8, i8* %add.ptr, i64 7
1082  %30 = load i8, i8* %arrayidx37.1, align 1
1083  %conv38.1 = zext i8 %30 to i32
1084  %arrayidx39.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 7
1085  %31 = load i8, i8* %arrayidx39.1, align 1
1086  %conv40.1 = zext i8 %31 to i32
1087  %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr, i64 %idx.ext
1088  %32 = load i8, i8* %add.ptr.1, align 1
1089  %conv.2 = zext i8 %32 to i32
1090  %add.ptr64.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 %idx.ext63
1091  %33 = load i8, i8* %add.ptr64.1, align 1
1092  %conv2.2 = zext i8 %33 to i32
1093  %arrayidx3.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 4
1094  %34 = load i8, i8* %arrayidx3.2, align 1
1095  %conv4.2 = zext i8 %34 to i32
1096  %arrayidx5.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 4
1097  %35 = load i8, i8* %arrayidx5.2, align 1
1098  %conv6.2 = zext i8 %35 to i32
1099  %arrayidx8.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 1
1100  %36 = load i8, i8* %arrayidx8.2, align 1
1101  %conv9.2 = zext i8 %36 to i32
1102  %arrayidx10.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 1
1103  %37 = load i8, i8* %arrayidx10.2, align 1
1104  %conv11.2 = zext i8 %37 to i32
1105  %arrayidx13.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 5
1106  %38 = load i8, i8* %arrayidx13.2, align 1
1107  %conv14.2 = zext i8 %38 to i32
1108  %arrayidx15.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 5
1109  %39 = load i8, i8* %arrayidx15.2, align 1
1110  %conv16.2 = zext i8 %39 to i32
1111  %arrayidx20.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 2
1112  %40 = load i8, i8* %arrayidx20.2, align 1
1113  %conv21.2 = zext i8 %40 to i32
1114  %arrayidx22.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 2
1115  %41 = load i8, i8* %arrayidx22.2, align 1
1116  %conv23.2 = zext i8 %41 to i32
1117  %arrayidx25.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 6
1118  %42 = load i8, i8* %arrayidx25.2, align 1
1119  %conv26.2 = zext i8 %42 to i32
1120  %arrayidx27.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 6
1121  %43 = load i8, i8* %arrayidx27.2, align 1
1122  %conv28.2 = zext i8 %43 to i32
1123  %arrayidx32.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 3
1124  %44 = load i8, i8* %arrayidx32.2, align 1
1125  %conv33.2 = zext i8 %44 to i32
1126  %arrayidx34.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 3
1127  %45 = load i8, i8* %arrayidx34.2, align 1
1128  %conv35.2 = zext i8 %45 to i32
1129  %arrayidx37.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 7
1130  %46 = load i8, i8* %arrayidx37.2, align 1
1131  %conv38.2 = zext i8 %46 to i32
1132  %arrayidx39.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 7
1133  %47 = load i8, i8* %arrayidx39.2, align 1
1134  %conv40.2 = zext i8 %47 to i32
1135  %add.ptr.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 %idx.ext
1136  %48 = load i8, i8* %add.ptr.2, align 1
1137  %conv.3 = zext i8 %48 to i32
1138  %add.ptr64.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 %idx.ext63
1139  %49 = load i8, i8* %add.ptr64.2, align 1
1140  %conv2.3 = zext i8 %49 to i32
1141  %arrayidx3.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 4
1142  %50 = load i8, i8* %arrayidx3.3, align 1
1143  %conv4.3 = zext i8 %50 to i32
1144  %arrayidx5.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 4
1145  %51 = load i8, i8* %arrayidx5.3, align 1
1146  %conv6.3 = zext i8 %51 to i32
1147  %arrayidx8.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 1
1148  %52 = load i8, i8* %arrayidx8.3, align 1
1149  %conv9.3 = zext i8 %52 to i32
1150  %arrayidx10.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 1
1151  %53 = load i8, i8* %arrayidx10.3, align 1
1152  %conv11.3 = zext i8 %53 to i32
1153  %arrayidx13.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 5
1154  %54 = load i8, i8* %arrayidx13.3, align 1
1155  %conv14.3 = zext i8 %54 to i32
1156  %arrayidx15.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 5
1157  %55 = load i8, i8* %arrayidx15.3, align 1
1158  %conv16.3 = zext i8 %55 to i32
1159  %arrayidx20.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 2
1160  %56 = load i8, i8* %arrayidx20.3, align 1
1161  %conv21.3 = zext i8 %56 to i32
1162  %arrayidx22.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 2
1163  %57 = load i8, i8* %arrayidx22.3, align 1
1164  %conv23.3 = zext i8 %57 to i32
1165  %arrayidx25.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 6
1166  %58 = load i8, i8* %arrayidx25.3, align 1
1167  %conv26.3 = zext i8 %58 to i32
1168  %arrayidx27.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 6
1169  %59 = load i8, i8* %arrayidx27.3, align 1
1170  %conv28.3 = zext i8 %59 to i32
1171  %arrayidx32.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 3
1172  %60 = load i8, i8* %arrayidx32.3, align 1
1173  %conv33.3 = zext i8 %60 to i32
1174  %arrayidx34.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 3
1175  %61 = load i8, i8* %arrayidx34.3, align 1
1176  %conv35.3 = zext i8 %61 to i32
1177  %arrayidx37.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 7
1178  %62 = load i8, i8* %arrayidx37.3, align 1
1179  %conv38.3 = zext i8 %62 to i32
1180  %arrayidx39.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 7
1181  %63 = load i8, i8* %arrayidx39.3, align 1
1182  %conv40.3 = zext i8 %63 to i32
1183
1184  %m1 = mul i32 %conv, %conv4
1185  %m2 = mul i32 %conv9, %conv14
1186  %m3 = mul i32 %conv21, %conv26
1187  %m4 = mul i32 %conv33, %conv38
1188  %m5 = mul i32 %conv2, %conv6
1189  %m6 = mul i32 %conv11, %conv16
1190  %m7 = mul i32 %conv23, %conv28
1191  %m8 = mul i32 %conv35, %conv40
1192  %m9 = mul i32 %conv.1, %conv4.1
1193  %m10 = mul i32 %conv9.1, %conv14.1
1194  %m11 = mul i32 %conv21.1, %conv26.1
1195  %m12 = mul i32 %conv33.1, %conv38.1
1196  %m13 = mul i32 %conv2.1, %conv6.1
1197  %m14 = mul i32 %conv11.1, %conv16.1
1198  %m15 = mul i32 %conv23.1, %conv28.1
1199  %m16 = mul i32 %conv35.1, %conv40.1
1200
1201  %dst1 = getelementptr inbounds i32, i32* %dst0, i64 1
1202  %dst2 = getelementptr inbounds i32, i32* %dst0, i64 2
1203  %dst3 = getelementptr inbounds i32, i32* %dst0, i64 3
1204  %dst4 = getelementptr inbounds i32, i32* %dst0, i64 4
1205  %dst5 = getelementptr inbounds i32, i32* %dst0, i64 5
1206  %dst6 = getelementptr inbounds i32, i32* %dst0, i64 6
1207  %dst7 = getelementptr inbounds i32, i32* %dst0, i64 7
1208  %dst8 = getelementptr inbounds i32, i32* %dst0, i64 8
1209  %dst9 = getelementptr inbounds i32, i32* %dst0, i64 9
1210  %dst10 = getelementptr inbounds i32, i32* %dst0, i64 10
1211  %dst11 = getelementptr inbounds i32, i32* %dst0, i64 11
1212  %dst12 = getelementptr inbounds i32, i32* %dst0, i64 12
1213  %dst13 = getelementptr inbounds i32, i32* %dst0, i64 13
1214  %dst14 = getelementptr inbounds i32, i32* %dst0, i64 14
1215  %dst15 = getelementptr inbounds i32, i32* %dst0, i64 15
1216  store i32 %m1, i32* %dst0
1217  store i32 %m2, i32* %dst1
1218  store i32 %m3, i32* %dst2
1219  store i32 %m4, i32* %dst3
1220  store i32 %m5, i32* %dst4
1221  store i32 %m6, i32* %dst5
1222  store i32 %m7, i32* %dst6
1223  store i32 %m8, i32* %dst7
1224  store i32 %m9, i32* %dst8
1225  store i32 %m10, i32* %dst9
1226  store i32 %m11, i32* %dst10
1227  store i32 %m12, i32* %dst11
1228  store i32 %m13, i32* %dst12
1229  store i32 %m14, i32* %dst13
1230  store i32 %m15, i32* %dst14
1231  store i32 %m16, i32* %dst15
1232  ret void
1233}
1234
1235define dso_local i32 @full(i8* nocapture noundef readonly %p1, i32 noundef %st1, i8* nocapture noundef readonly %p2, i32 noundef %st2) {
1236; CHECK-LABEL: @full(
1237; CHECK-NEXT:  entry:
1238; CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[ST1:%.*]] to i64
1239; CHECK-NEXT:    [[IDX_EXT63:%.*]] = sext i32 [[ST2:%.*]] to i64
1240; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[P1:%.*]], i64 4
1241; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, i8* [[P2:%.*]], i64 4
1242; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 [[IDX_EXT]]
1243; CHECK-NEXT:    [[ADD_PTR64:%.*]] = getelementptr inbounds i8, i8* [[P2]], i64 [[IDX_EXT63]]
1244; CHECK-NEXT:    [[ARRAYIDX3_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 4
1245; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 4
1246; CHECK-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR]], i64 [[IDX_EXT]]
1247; CHECK-NEXT:    [[ADD_PTR64_1:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64]], i64 [[IDX_EXT63]]
1248; CHECK-NEXT:    [[ARRAYIDX3_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_1]], i64 4
1249; CHECK-NEXT:    [[ARRAYIDX5_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_1]], i64 4
1250; CHECK-NEXT:    [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_1]], i64 [[IDX_EXT]]
1251; CHECK-NEXT:    [[ADD_PTR64_2:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_1]], i64 [[IDX_EXT63]]
1252; CHECK-NEXT:    [[ARRAYIDX3_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR_2]], i64 4
1253; CHECK-NEXT:    [[ARRAYIDX5_3:%.*]] = getelementptr inbounds i8, i8* [[ADD_PTR64_2]], i64 4
1254; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[P1]] to <4 x i8>*
1255; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1
1256; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[P2]] to <4 x i8>*
1257; CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 1
1258; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[ARRAYIDX3]] to <4 x i8>*
1259; CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i8>, <4 x i8>* [[TMP4]], align 1
1260; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[ARRAYIDX5]] to <4 x i8>*
1261; CHECK-NEXT:    [[TMP7:%.*]] = load <4 x i8>, <4 x i8>* [[TMP6]], align 1
1262; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i8* [[ADD_PTR]] to <4 x i8>*
1263; CHECK-NEXT:    [[TMP9:%.*]] = load <4 x i8>, <4 x i8>* [[TMP8]], align 1
1264; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i8* [[ADD_PTR64]] to <4 x i8>*
1265; CHECK-NEXT:    [[TMP11:%.*]] = load <4 x i8>, <4 x i8>* [[TMP10]], align 1
1266; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8* [[ARRAYIDX3_1]] to <4 x i8>*
1267; CHECK-NEXT:    [[TMP13:%.*]] = load <4 x i8>, <4 x i8>* [[TMP12]], align 1
1268; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i8* [[ARRAYIDX5_1]] to <4 x i8>*
1269; CHECK-NEXT:    [[TMP15:%.*]] = load <4 x i8>, <4 x i8>* [[TMP14]], align 1
1270; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8* [[ADD_PTR_1]] to <4 x i8>*
1271; CHECK-NEXT:    [[TMP17:%.*]] = load <4 x i8>, <4 x i8>* [[TMP16]], align 1
1272; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i8* [[ADD_PTR64_1]] to <4 x i8>*
1273; CHECK-NEXT:    [[TMP19:%.*]] = load <4 x i8>, <4 x i8>* [[TMP18]], align 1
1274; CHECK-NEXT:    [[TMP20:%.*]] = bitcast i8* [[ARRAYIDX3_2]] to <4 x i8>*
1275; CHECK-NEXT:    [[TMP21:%.*]] = load <4 x i8>, <4 x i8>* [[TMP20]], align 1
1276; CHECK-NEXT:    [[TMP22:%.*]] = bitcast i8* [[ARRAYIDX5_2]] to <4 x i8>*
1277; CHECK-NEXT:    [[TMP23:%.*]] = load <4 x i8>, <4 x i8>* [[TMP22]], align 1
1278; CHECK-NEXT:    [[TMP24:%.*]] = bitcast i8* [[ADD_PTR_2]] to <4 x i8>*
1279; CHECK-NEXT:    [[TMP25:%.*]] = load <4 x i8>, <4 x i8>* [[TMP24]], align 1
1280; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <4 x i8> [[TMP25]], <4 x i8> [[TMP17]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1281; CHECK-NEXT:    [[TMP27:%.*]] = shufflevector <4 x i8> [[TMP9]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1282; CHECK-NEXT:    [[TMP28:%.*]] = shufflevector <16 x i8> [[TMP26]], <16 x i8> [[TMP27]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef>
1283; CHECK-NEXT:    [[TMP29:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1284; CHECK-NEXT:    [[TMP30:%.*]] = shufflevector <16 x i8> [[TMP28]], <16 x i8> [[TMP29]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
1285; CHECK-NEXT:    [[TMP31:%.*]] = zext <16 x i8> [[TMP30]] to <16 x i32>
1286; CHECK-NEXT:    [[TMP32:%.*]] = bitcast i8* [[ADD_PTR64_2]] to <4 x i8>*
1287; CHECK-NEXT:    [[TMP33:%.*]] = load <4 x i8>, <4 x i8>* [[TMP32]], align 1
1288; CHECK-NEXT:    [[TMP34:%.*]] = shufflevector <4 x i8> [[TMP33]], <4 x i8> [[TMP19]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1289; CHECK-NEXT:    [[TMP35:%.*]] = shufflevector <4 x i8> [[TMP11]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1290; CHECK-NEXT:    [[TMP36:%.*]] = shufflevector <16 x i8> [[TMP34]], <16 x i8> [[TMP35]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef>
1291; CHECK-NEXT:    [[TMP37:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1292; CHECK-NEXT:    [[TMP38:%.*]] = shufflevector <16 x i8> [[TMP36]], <16 x i8> [[TMP37]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
1293; CHECK-NEXT:    [[TMP39:%.*]] = zext <16 x i8> [[TMP38]] to <16 x i32>
1294; CHECK-NEXT:    [[TMP40:%.*]] = sub nsw <16 x i32> [[TMP31]], [[TMP39]]
1295; CHECK-NEXT:    [[TMP41:%.*]] = bitcast i8* [[ARRAYIDX3_3]] to <4 x i8>*
1296; CHECK-NEXT:    [[TMP42:%.*]] = load <4 x i8>, <4 x i8>* [[TMP41]], align 1
1297; CHECK-NEXT:    [[TMP43:%.*]] = shufflevector <4 x i8> [[TMP42]], <4 x i8> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1298; CHECK-NEXT:    [[TMP44:%.*]] = shufflevector <4 x i8> [[TMP13]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1299; CHECK-NEXT:    [[TMP45:%.*]] = shufflevector <16 x i8> [[TMP43]], <16 x i8> [[TMP44]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef>
1300; CHECK-NEXT:    [[TMP46:%.*]] = shufflevector <4 x i8> [[TMP5]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1301; CHECK-NEXT:    [[TMP47:%.*]] = shufflevector <16 x i8> [[TMP45]], <16 x i8> [[TMP46]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
1302; CHECK-NEXT:    [[TMP48:%.*]] = zext <16 x i8> [[TMP47]] to <16 x i32>
1303; CHECK-NEXT:    [[TMP49:%.*]] = bitcast i8* [[ARRAYIDX5_3]] to <4 x i8>*
1304; CHECK-NEXT:    [[TMP50:%.*]] = load <4 x i8>, <4 x i8>* [[TMP49]], align 1
1305; CHECK-NEXT:    [[TMP51:%.*]] = shufflevector <4 x i8> [[TMP50]], <4 x i8> [[TMP23]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1306; CHECK-NEXT:    [[TMP52:%.*]] = shufflevector <4 x i8> [[TMP15]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1307; CHECK-NEXT:    [[TMP53:%.*]] = shufflevector <16 x i8> [[TMP51]], <16 x i8> [[TMP52]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef>
1308; CHECK-NEXT:    [[TMP54:%.*]] = shufflevector <4 x i8> [[TMP7]], <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1309; CHECK-NEXT:    [[TMP55:%.*]] = shufflevector <16 x i8> [[TMP53]], <16 x i8> [[TMP54]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19>
1310; CHECK-NEXT:    [[TMP56:%.*]] = zext <16 x i8> [[TMP55]] to <16 x i32>
1311; CHECK-NEXT:    [[TMP57:%.*]] = sub nsw <16 x i32> [[TMP48]], [[TMP56]]
1312; CHECK-NEXT:    [[TMP58:%.*]] = shl nsw <16 x i32> [[TMP57]], <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
1313; CHECK-NEXT:    [[TMP59:%.*]] = add nsw <16 x i32> [[TMP58]], [[TMP40]]
1314; CHECK-NEXT:    [[TMP60:%.*]] = shufflevector <16 x i32> [[TMP59]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
1315; CHECK-NEXT:    [[TMP61:%.*]] = add nsw <16 x i32> [[TMP59]], [[TMP60]]
1316; CHECK-NEXT:    [[TMP62:%.*]] = sub nsw <16 x i32> [[TMP59]], [[TMP60]]
1317; CHECK-NEXT:    [[TMP63:%.*]] = shufflevector <16 x i32> [[TMP61]], <16 x i32> [[TMP62]], <16 x i32> <i32 3, i32 7, i32 11, i32 15, i32 22, i32 18, i32 26, i32 30, i32 5, i32 1, i32 9, i32 13, i32 20, i32 16, i32 24, i32 28>
1318; CHECK-NEXT:    [[TMP64:%.*]] = shufflevector <16 x i32> [[TMP63]], <16 x i32> poison, <16 x i32> <i32 9, i32 8, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1319; CHECK-NEXT:    [[TMP65:%.*]] = add nsw <16 x i32> [[TMP63]], [[TMP64]]
1320; CHECK-NEXT:    [[TMP66:%.*]] = sub nsw <16 x i32> [[TMP63]], [[TMP64]]
1321; CHECK-NEXT:    [[TMP67:%.*]] = shufflevector <16 x i32> [[TMP65]], <16 x i32> [[TMP66]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
1322; CHECK-NEXT:    [[TMP68:%.*]] = shufflevector <16 x i32> [[TMP67]], <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
1323; CHECK-NEXT:    [[TMP69:%.*]] = add nsw <16 x i32> [[TMP67]], [[TMP68]]
1324; CHECK-NEXT:    [[TMP70:%.*]] = sub nsw <16 x i32> [[TMP67]], [[TMP68]]
1325; CHECK-NEXT:    [[TMP71:%.*]] = shufflevector <16 x i32> [[TMP69]], <16 x i32> [[TMP70]], <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 20, i32 5, i32 6, i32 23, i32 24, i32 9, i32 10, i32 27, i32 28, i32 13, i32 14, i32 31>
1326; CHECK-NEXT:    [[TMP72:%.*]] = shufflevector <16 x i32> [[TMP71]], <16 x i32> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
1327; CHECK-NEXT:    [[TMP73:%.*]] = add nsw <16 x i32> [[TMP71]], [[TMP72]]
1328; CHECK-NEXT:    [[TMP74:%.*]] = sub nsw <16 x i32> [[TMP71]], [[TMP72]]
1329; CHECK-NEXT:    [[TMP75:%.*]] = shufflevector <16 x i32> [[TMP73]], <16 x i32> [[TMP74]], <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
1330; CHECK-NEXT:    [[TMP76:%.*]] = lshr <16 x i32> [[TMP75]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
1331; CHECK-NEXT:    [[TMP77:%.*]] = and <16 x i32> [[TMP76]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
1332; CHECK-NEXT:    [[TMP78:%.*]] = mul nuw <16 x i32> [[TMP77]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
1333; CHECK-NEXT:    [[TMP79:%.*]] = add <16 x i32> [[TMP78]], [[TMP75]]
1334; CHECK-NEXT:    [[TMP80:%.*]] = xor <16 x i32> [[TMP79]], [[TMP78]]
1335; CHECK-NEXT:    [[TMP81:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP80]])
1336; CHECK-NEXT:    [[CONV118:%.*]] = and i32 [[TMP81]], 65535
1337; CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[TMP81]], 16
1338; CHECK-NEXT:    [[ADD119:%.*]] = add nuw nsw i32 [[CONV118]], [[SHR]]
1339; CHECK-NEXT:    [[SHR120:%.*]] = lshr i32 [[ADD119]], 1
1340; CHECK-NEXT:    ret i32 [[SHR120]]
1341;
1342entry:
1343  %idx.ext = sext i32 %st1 to i64
1344  %idx.ext63 = sext i32 %st2 to i64
1345  %0 = load i8, i8* %p1, align 1
1346  %conv = zext i8 %0 to i32
1347  %1 = load i8, i8* %p2, align 1
1348  %conv2 = zext i8 %1 to i32
1349  %sub = sub nsw i32 %conv, %conv2
1350  %arrayidx3 = getelementptr inbounds i8, i8* %p1, i64 4
1351  %2 = load i8, i8* %arrayidx3, align 1
1352  %conv4 = zext i8 %2 to i32
1353  %arrayidx5 = getelementptr inbounds i8, i8* %p2, i64 4
1354  %3 = load i8, i8* %arrayidx5, align 1
1355  %conv6 = zext i8 %3 to i32
1356  %sub7 = sub nsw i32 %conv4, %conv6
1357  %shl = shl nsw i32 %sub7, 16
1358  %add = add nsw i32 %shl, %sub
1359  %arrayidx8 = getelementptr inbounds i8, i8* %p1, i64 1
1360  %4 = load i8, i8* %arrayidx8, align 1
1361  %conv9 = zext i8 %4 to i32
1362  %arrayidx10 = getelementptr inbounds i8, i8* %p2, i64 1
1363  %5 = load i8, i8* %arrayidx10, align 1
1364  %conv11 = zext i8 %5 to i32
1365  %sub12 = sub nsw i32 %conv9, %conv11
1366  %arrayidx13 = getelementptr inbounds i8, i8* %p1, i64 5
1367  %6 = load i8, i8* %arrayidx13, align 1
1368  %conv14 = zext i8 %6 to i32
1369  %arrayidx15 = getelementptr inbounds i8, i8* %p2, i64 5
1370  %7 = load i8, i8* %arrayidx15, align 1
1371  %conv16 = zext i8 %7 to i32
1372  %sub17 = sub nsw i32 %conv14, %conv16
1373  %shl18 = shl nsw i32 %sub17, 16
1374  %add19 = add nsw i32 %shl18, %sub12
1375  %arrayidx20 = getelementptr inbounds i8, i8* %p1, i64 2
1376  %8 = load i8, i8* %arrayidx20, align 1
1377  %conv21 = zext i8 %8 to i32
1378  %arrayidx22 = getelementptr inbounds i8, i8* %p2, i64 2
1379  %9 = load i8, i8* %arrayidx22, align 1
1380  %conv23 = zext i8 %9 to i32
1381  %sub24 = sub nsw i32 %conv21, %conv23
1382  %arrayidx25 = getelementptr inbounds i8, i8* %p1, i64 6
1383  %10 = load i8, i8* %arrayidx25, align 1
1384  %conv26 = zext i8 %10 to i32
1385  %arrayidx27 = getelementptr inbounds i8, i8* %p2, i64 6
1386  %11 = load i8, i8* %arrayidx27, align 1
1387  %conv28 = zext i8 %11 to i32
1388  %sub29 = sub nsw i32 %conv26, %conv28
1389  %shl30 = shl nsw i32 %sub29, 16
1390  %add31 = add nsw i32 %shl30, %sub24
1391  %arrayidx32 = getelementptr inbounds i8, i8* %p1, i64 3
1392  %12 = load i8, i8* %arrayidx32, align 1
1393  %conv33 = zext i8 %12 to i32
1394  %arrayidx34 = getelementptr inbounds i8, i8* %p2, i64 3
1395  %13 = load i8, i8* %arrayidx34, align 1
1396  %conv35 = zext i8 %13 to i32
1397  %sub36 = sub nsw i32 %conv33, %conv35
1398  %arrayidx37 = getelementptr inbounds i8, i8* %p1, i64 7
1399  %14 = load i8, i8* %arrayidx37, align 1
1400  %conv38 = zext i8 %14 to i32
1401  %arrayidx39 = getelementptr inbounds i8, i8* %p2, i64 7
1402  %15 = load i8, i8* %arrayidx39, align 1
1403  %conv40 = zext i8 %15 to i32
1404  %sub41 = sub nsw i32 %conv38, %conv40
1405  %shl42 = shl nsw i32 %sub41, 16
1406  %add43 = add nsw i32 %shl42, %sub36
1407  %add44 = add nsw i32 %add19, %add
1408  %sub45 = sub nsw i32 %add, %add19
1409  %add46 = add nsw i32 %add43, %add31
1410  %sub47 = sub nsw i32 %add31, %add43
1411  %add48 = add nsw i32 %add46, %add44
1412  %sub51 = sub nsw i32 %add44, %add46
1413  %add55 = add nsw i32 %sub47, %sub45
1414  %sub59 = sub nsw i32 %sub45, %sub47
1415  %add.ptr = getelementptr inbounds i8, i8* %p1, i64 %idx.ext
1416  %add.ptr64 = getelementptr inbounds i8, i8* %p2, i64 %idx.ext63
1417  %16 = load i8, i8* %add.ptr, align 1
1418  %conv.1 = zext i8 %16 to i32
1419  %17 = load i8, i8* %add.ptr64, align 1
1420  %conv2.1 = zext i8 %17 to i32
1421  %sub.1 = sub nsw i32 %conv.1, %conv2.1
1422  %arrayidx3.1 = getelementptr inbounds i8, i8* %add.ptr, i64 4
1423  %18 = load i8, i8* %arrayidx3.1, align 1
1424  %conv4.1 = zext i8 %18 to i32
1425  %arrayidx5.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 4
1426  %19 = load i8, i8* %arrayidx5.1, align 1
1427  %conv6.1 = zext i8 %19 to i32
1428  %sub7.1 = sub nsw i32 %conv4.1, %conv6.1
1429  %shl.1 = shl nsw i32 %sub7.1, 16
1430  %add.1 = add nsw i32 %shl.1, %sub.1
1431  %arrayidx8.1 = getelementptr inbounds i8, i8* %add.ptr, i64 1
1432  %20 = load i8, i8* %arrayidx8.1, align 1
1433  %conv9.1 = zext i8 %20 to i32
1434  %arrayidx10.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 1
1435  %21 = load i8, i8* %arrayidx10.1, align 1
1436  %conv11.1 = zext i8 %21 to i32
1437  %sub12.1 = sub nsw i32 %conv9.1, %conv11.1
1438  %arrayidx13.1 = getelementptr inbounds i8, i8* %add.ptr, i64 5
1439  %22 = load i8, i8* %arrayidx13.1, align 1
1440  %conv14.1 = zext i8 %22 to i32
1441  %arrayidx15.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 5
1442  %23 = load i8, i8* %arrayidx15.1, align 1
1443  %conv16.1 = zext i8 %23 to i32
1444  %sub17.1 = sub nsw i32 %conv14.1, %conv16.1
1445  %shl18.1 = shl nsw i32 %sub17.1, 16
1446  %add19.1 = add nsw i32 %shl18.1, %sub12.1
1447  %arrayidx20.1 = getelementptr inbounds i8, i8* %add.ptr, i64 2
1448  %24 = load i8, i8* %arrayidx20.1, align 1
1449  %conv21.1 = zext i8 %24 to i32
1450  %arrayidx22.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 2
1451  %25 = load i8, i8* %arrayidx22.1, align 1
1452  %conv23.1 = zext i8 %25 to i32
1453  %sub24.1 = sub nsw i32 %conv21.1, %conv23.1
1454  %arrayidx25.1 = getelementptr inbounds i8, i8* %add.ptr, i64 6
1455  %26 = load i8, i8* %arrayidx25.1, align 1
1456  %conv26.1 = zext i8 %26 to i32
1457  %arrayidx27.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 6
1458  %27 = load i8, i8* %arrayidx27.1, align 1
1459  %conv28.1 = zext i8 %27 to i32
1460  %sub29.1 = sub nsw i32 %conv26.1, %conv28.1
1461  %shl30.1 = shl nsw i32 %sub29.1, 16
1462  %add31.1 = add nsw i32 %shl30.1, %sub24.1
1463  %arrayidx32.1 = getelementptr inbounds i8, i8* %add.ptr, i64 3
1464  %28 = load i8, i8* %arrayidx32.1, align 1
1465  %conv33.1 = zext i8 %28 to i32
1466  %arrayidx34.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 3
1467  %29 = load i8, i8* %arrayidx34.1, align 1
1468  %conv35.1 = zext i8 %29 to i32
1469  %sub36.1 = sub nsw i32 %conv33.1, %conv35.1
1470  %arrayidx37.1 = getelementptr inbounds i8, i8* %add.ptr, i64 7
1471  %30 = load i8, i8* %arrayidx37.1, align 1
1472  %conv38.1 = zext i8 %30 to i32
1473  %arrayidx39.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 7
1474  %31 = load i8, i8* %arrayidx39.1, align 1
1475  %conv40.1 = zext i8 %31 to i32
1476  %sub41.1 = sub nsw i32 %conv38.1, %conv40.1
1477  %shl42.1 = shl nsw i32 %sub41.1, 16
1478  %add43.1 = add nsw i32 %shl42.1, %sub36.1
1479  %add44.1 = add nsw i32 %add19.1, %add.1
1480  %sub45.1 = sub nsw i32 %add.1, %add19.1
1481  %add46.1 = add nsw i32 %add43.1, %add31.1
1482  %sub47.1 = sub nsw i32 %add31.1, %add43.1
1483  %add48.1 = add nsw i32 %add46.1, %add44.1
1484  %sub51.1 = sub nsw i32 %add44.1, %add46.1
1485  %add55.1 = add nsw i32 %sub47.1, %sub45.1
1486  %sub59.1 = sub nsw i32 %sub45.1, %sub47.1
1487  %add.ptr.1 = getelementptr inbounds i8, i8* %add.ptr, i64 %idx.ext
1488  %add.ptr64.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 %idx.ext63
1489  %32 = load i8, i8* %add.ptr.1, align 1
1490  %conv.2 = zext i8 %32 to i32
1491  %33 = load i8, i8* %add.ptr64.1, align 1
1492  %conv2.2 = zext i8 %33 to i32
1493  %sub.2 = sub nsw i32 %conv.2, %conv2.2
1494  %arrayidx3.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 4
1495  %34 = load i8, i8* %arrayidx3.2, align 1
1496  %conv4.2 = zext i8 %34 to i32
1497  %arrayidx5.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 4
1498  %35 = load i8, i8* %arrayidx5.2, align 1
1499  %conv6.2 = zext i8 %35 to i32
1500  %sub7.2 = sub nsw i32 %conv4.2, %conv6.2
1501  %shl.2 = shl nsw i32 %sub7.2, 16
1502  %add.2 = add nsw i32 %shl.2, %sub.2
1503  %arrayidx8.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 1
1504  %36 = load i8, i8* %arrayidx8.2, align 1
1505  %conv9.2 = zext i8 %36 to i32
1506  %arrayidx10.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 1
1507  %37 = load i8, i8* %arrayidx10.2, align 1
1508  %conv11.2 = zext i8 %37 to i32
1509  %sub12.2 = sub nsw i32 %conv9.2, %conv11.2
1510  %arrayidx13.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 5
1511  %38 = load i8, i8* %arrayidx13.2, align 1
1512  %conv14.2 = zext i8 %38 to i32
1513  %arrayidx15.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 5
1514  %39 = load i8, i8* %arrayidx15.2, align 1
1515  %conv16.2 = zext i8 %39 to i32
1516  %sub17.2 = sub nsw i32 %conv14.2, %conv16.2
1517  %shl18.2 = shl nsw i32 %sub17.2, 16
1518  %add19.2 = add nsw i32 %shl18.2, %sub12.2
1519  %arrayidx20.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 2
1520  %40 = load i8, i8* %arrayidx20.2, align 1
1521  %conv21.2 = zext i8 %40 to i32
1522  %arrayidx22.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 2
1523  %41 = load i8, i8* %arrayidx22.2, align 1
1524  %conv23.2 = zext i8 %41 to i32
1525  %sub24.2 = sub nsw i32 %conv21.2, %conv23.2
1526  %arrayidx25.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 6
1527  %42 = load i8, i8* %arrayidx25.2, align 1
1528  %conv26.2 = zext i8 %42 to i32
1529  %arrayidx27.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 6
1530  %43 = load i8, i8* %arrayidx27.2, align 1
1531  %conv28.2 = zext i8 %43 to i32
1532  %sub29.2 = sub nsw i32 %conv26.2, %conv28.2
1533  %shl30.2 = shl nsw i32 %sub29.2, 16
1534  %add31.2 = add nsw i32 %shl30.2, %sub24.2
1535  %arrayidx32.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 3
1536  %44 = load i8, i8* %arrayidx32.2, align 1
1537  %conv33.2 = zext i8 %44 to i32
1538  %arrayidx34.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 3
1539  %45 = load i8, i8* %arrayidx34.2, align 1
1540  %conv35.2 = zext i8 %45 to i32
1541  %sub36.2 = sub nsw i32 %conv33.2, %conv35.2
1542  %arrayidx37.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 7
1543  %46 = load i8, i8* %arrayidx37.2, align 1
1544  %conv38.2 = zext i8 %46 to i32
1545  %arrayidx39.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 7
1546  %47 = load i8, i8* %arrayidx39.2, align 1
1547  %conv40.2 = zext i8 %47 to i32
1548  %sub41.2 = sub nsw i32 %conv38.2, %conv40.2
1549  %shl42.2 = shl nsw i32 %sub41.2, 16
1550  %add43.2 = add nsw i32 %shl42.2, %sub36.2
1551  %add44.2 = add nsw i32 %add19.2, %add.2
1552  %sub45.2 = sub nsw i32 %add.2, %add19.2
1553  %add46.2 = add nsw i32 %add43.2, %add31.2
1554  %sub47.2 = sub nsw i32 %add31.2, %add43.2
1555  %add48.2 = add nsw i32 %add46.2, %add44.2
1556  %sub51.2 = sub nsw i32 %add44.2, %add46.2
1557  %add55.2 = add nsw i32 %sub47.2, %sub45.2
1558  %sub59.2 = sub nsw i32 %sub45.2, %sub47.2
1559  %add.ptr.2 = getelementptr inbounds i8, i8* %add.ptr.1, i64 %idx.ext
1560  %add.ptr64.2 = getelementptr inbounds i8, i8* %add.ptr64.1, i64 %idx.ext63
1561  %48 = load i8, i8* %add.ptr.2, align 1
1562  %conv.3 = zext i8 %48 to i32
1563  %49 = load i8, i8* %add.ptr64.2, align 1
1564  %conv2.3 = zext i8 %49 to i32
1565  %sub.3 = sub nsw i32 %conv.3, %conv2.3
1566  %arrayidx3.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 4
1567  %50 = load i8, i8* %arrayidx3.3, align 1
1568  %conv4.3 = zext i8 %50 to i32
1569  %arrayidx5.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 4
1570  %51 = load i8, i8* %arrayidx5.3, align 1
1571  %conv6.3 = zext i8 %51 to i32
1572  %sub7.3 = sub nsw i32 %conv4.3, %conv6.3
1573  %shl.3 = shl nsw i32 %sub7.3, 16
1574  %add.3 = add nsw i32 %shl.3, %sub.3
1575  %arrayidx8.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 1
1576  %52 = load i8, i8* %arrayidx8.3, align 1
1577  %conv9.3 = zext i8 %52 to i32
1578  %arrayidx10.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 1
1579  %53 = load i8, i8* %arrayidx10.3, align 1
1580  %conv11.3 = zext i8 %53 to i32
1581  %sub12.3 = sub nsw i32 %conv9.3, %conv11.3
1582  %arrayidx13.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 5
1583  %54 = load i8, i8* %arrayidx13.3, align 1
1584  %conv14.3 = zext i8 %54 to i32
1585  %arrayidx15.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 5
1586  %55 = load i8, i8* %arrayidx15.3, align 1
1587  %conv16.3 = zext i8 %55 to i32
1588  %sub17.3 = sub nsw i32 %conv14.3, %conv16.3
1589  %shl18.3 = shl nsw i32 %sub17.3, 16
1590  %add19.3 = add nsw i32 %shl18.3, %sub12.3
1591  %arrayidx20.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 2
1592  %56 = load i8, i8* %arrayidx20.3, align 1
1593  %conv21.3 = zext i8 %56 to i32
1594  %arrayidx22.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 2
1595  %57 = load i8, i8* %arrayidx22.3, align 1
1596  %conv23.3 = zext i8 %57 to i32
1597  %sub24.3 = sub nsw i32 %conv21.3, %conv23.3
1598  %arrayidx25.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 6
1599  %58 = load i8, i8* %arrayidx25.3, align 1
1600  %conv26.3 = zext i8 %58 to i32
1601  %arrayidx27.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 6
1602  %59 = load i8, i8* %arrayidx27.3, align 1
1603  %conv28.3 = zext i8 %59 to i32
1604  %sub29.3 = sub nsw i32 %conv26.3, %conv28.3
1605  %shl30.3 = shl nsw i32 %sub29.3, 16
1606  %add31.3 = add nsw i32 %shl30.3, %sub24.3
1607  %arrayidx32.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 3
1608  %60 = load i8, i8* %arrayidx32.3, align 1
1609  %conv33.3 = zext i8 %60 to i32
1610  %arrayidx34.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 3
1611  %61 = load i8, i8* %arrayidx34.3, align 1
1612  %conv35.3 = zext i8 %61 to i32
1613  %sub36.3 = sub nsw i32 %conv33.3, %conv35.3
1614  %arrayidx37.3 = getelementptr inbounds i8, i8* %add.ptr.2, i64 7
1615  %62 = load i8, i8* %arrayidx37.3, align 1
1616  %conv38.3 = zext i8 %62 to i32
1617  %arrayidx39.3 = getelementptr inbounds i8, i8* %add.ptr64.2, i64 7
1618  %63 = load i8, i8* %arrayidx39.3, align 1
1619  %conv40.3 = zext i8 %63 to i32
1620  %sub41.3 = sub nsw i32 %conv38.3, %conv40.3
1621  %shl42.3 = shl nsw i32 %sub41.3, 16
1622  %add43.3 = add nsw i32 %shl42.3, %sub36.3
1623  %add44.3 = add nsw i32 %add19.3, %add.3
1624  %sub45.3 = sub nsw i32 %add.3, %add19.3
1625  %add46.3 = add nsw i32 %add43.3, %add31.3
1626  %sub47.3 = sub nsw i32 %add31.3, %add43.3
1627  %add48.3 = add nsw i32 %add46.3, %add44.3
1628  %sub51.3 = sub nsw i32 %add44.3, %add46.3
1629  %add55.3 = add nsw i32 %sub47.3, %sub45.3
1630  %sub59.3 = sub nsw i32 %sub45.3, %sub47.3
1631  %add78 = add nsw i32 %add48.1, %add48
1632  %sub86 = sub nsw i32 %add48, %add48.1
1633  %add94 = add nsw i32 %add48.3, %add48.2
1634  %sub102 = sub nsw i32 %add48.2, %add48.3
1635  %add103 = add nsw i32 %add94, %add78
1636  %sub104 = sub nsw i32 %add78, %add94
1637  %add105 = add nsw i32 %sub102, %sub86
1638  %sub106 = sub nsw i32 %sub86, %sub102
1639  %shr.i = lshr i32 %add103, 15
1640  %and.i = and i32 %shr.i, 65537
1641  %mul.i = mul nuw i32 %and.i, 65535
1642  %add.i = add i32 %mul.i, %add103
1643  %xor.i = xor i32 %add.i, %mul.i
1644  %shr.i184 = lshr i32 %add105, 15
1645  %and.i185 = and i32 %shr.i184, 65537
1646  %mul.i186 = mul nuw i32 %and.i185, 65535
1647  %add.i187 = add i32 %mul.i186, %add105
1648  %xor.i188 = xor i32 %add.i187, %mul.i186
1649  %shr.i189 = lshr i32 %sub104, 15
1650  %and.i190 = and i32 %shr.i189, 65537
1651  %mul.i191 = mul nuw i32 %and.i190, 65535
1652  %add.i192 = add i32 %mul.i191, %sub104
1653  %xor.i193 = xor i32 %add.i192, %mul.i191
1654  %shr.i194 = lshr i32 %sub106, 15
1655  %and.i195 = and i32 %shr.i194, 65537
1656  %mul.i196 = mul nuw i32 %and.i195, 65535
1657  %add.i197 = add i32 %mul.i196, %sub106
1658  %xor.i198 = xor i32 %add.i197, %mul.i196
1659  %add110 = add i32 %xor.i188, %xor.i
1660  %add112 = add i32 %add110, %xor.i193
1661  %add113 = add i32 %add112, %xor.i198
1662  %add78.1 = add nsw i32 %add55.1, %add55
1663  %sub86.1 = sub nsw i32 %add55, %add55.1
1664  %add94.1 = add nsw i32 %add55.3, %add55.2
1665  %sub102.1 = sub nsw i32 %add55.2, %add55.3
1666  %add103.1 = add nsw i32 %add94.1, %add78.1
1667  %sub104.1 = sub nsw i32 %add78.1, %add94.1
1668  %add105.1 = add nsw i32 %sub102.1, %sub86.1
1669  %sub106.1 = sub nsw i32 %sub86.1, %sub102.1
1670  %shr.i.1 = lshr i32 %add103.1, 15
1671  %and.i.1 = and i32 %shr.i.1, 65537
1672  %mul.i.1 = mul nuw i32 %and.i.1, 65535
1673  %add.i.1 = add i32 %mul.i.1, %add103.1
1674  %xor.i.1 = xor i32 %add.i.1, %mul.i.1
1675  %shr.i184.1 = lshr i32 %add105.1, 15
1676  %and.i185.1 = and i32 %shr.i184.1, 65537
1677  %mul.i186.1 = mul nuw i32 %and.i185.1, 65535
1678  %add.i187.1 = add i32 %mul.i186.1, %add105.1
1679  %xor.i188.1 = xor i32 %add.i187.1, %mul.i186.1
1680  %shr.i189.1 = lshr i32 %sub104.1, 15
1681  %and.i190.1 = and i32 %shr.i189.1, 65537
1682  %mul.i191.1 = mul nuw i32 %and.i190.1, 65535
1683  %add.i192.1 = add i32 %mul.i191.1, %sub104.1
1684  %xor.i193.1 = xor i32 %add.i192.1, %mul.i191.1
1685  %shr.i194.1 = lshr i32 %sub106.1, 15
1686  %and.i195.1 = and i32 %shr.i194.1, 65537
1687  %mul.i196.1 = mul nuw i32 %and.i195.1, 65535
1688  %add.i197.1 = add i32 %mul.i196.1, %sub106.1
1689  %xor.i198.1 = xor i32 %add.i197.1, %mul.i196.1
1690  %add108.1 = add i32 %xor.i188.1, %add113
1691  %add110.1 = add i32 %add108.1, %xor.i.1
1692  %add112.1 = add i32 %add110.1, %xor.i193.1
1693  %add113.1 = add i32 %add112.1, %xor.i198.1
1694  %add78.2 = add nsw i32 %sub51.1, %sub51
1695  %sub86.2 = sub nsw i32 %sub51, %sub51.1
1696  %add94.2 = add nsw i32 %sub51.3, %sub51.2
1697  %sub102.2 = sub nsw i32 %sub51.2, %sub51.3
1698  %add103.2 = add nsw i32 %add94.2, %add78.2
1699  %sub104.2 = sub nsw i32 %add78.2, %add94.2
1700  %add105.2 = add nsw i32 %sub102.2, %sub86.2
1701  %sub106.2 = sub nsw i32 %sub86.2, %sub102.2
1702  %shr.i.2 = lshr i32 %add103.2, 15
1703  %and.i.2 = and i32 %shr.i.2, 65537
1704  %mul.i.2 = mul nuw i32 %and.i.2, 65535
1705  %add.i.2 = add i32 %mul.i.2, %add103.2
1706  %xor.i.2 = xor i32 %add.i.2, %mul.i.2
1707  %shr.i184.2 = lshr i32 %add105.2, 15
1708  %and.i185.2 = and i32 %shr.i184.2, 65537
1709  %mul.i186.2 = mul nuw i32 %and.i185.2, 65535
1710  %add.i187.2 = add i32 %mul.i186.2, %add105.2
1711  %xor.i188.2 = xor i32 %add.i187.2, %mul.i186.2
1712  %shr.i189.2 = lshr i32 %sub104.2, 15
1713  %and.i190.2 = and i32 %shr.i189.2, 65537
1714  %mul.i191.2 = mul nuw i32 %and.i190.2, 65535
1715  %add.i192.2 = add i32 %mul.i191.2, %sub104.2
1716  %xor.i193.2 = xor i32 %add.i192.2, %mul.i191.2
1717  %shr.i194.2 = lshr i32 %sub106.2, 15
1718  %and.i195.2 = and i32 %shr.i194.2, 65537
1719  %mul.i196.2 = mul nuw i32 %and.i195.2, 65535
1720  %add.i197.2 = add i32 %mul.i196.2, %sub106.2
1721  %xor.i198.2 = xor i32 %add.i197.2, %mul.i196.2
1722  %add108.2 = add i32 %xor.i188.2, %add113.1
1723  %add110.2 = add i32 %add108.2, %xor.i.2
1724  %add112.2 = add i32 %add110.2, %xor.i193.2
1725  %add113.2 = add i32 %add112.2, %xor.i198.2
1726  %add78.3 = add nsw i32 %sub59.1, %sub59
1727  %sub86.3 = sub nsw i32 %sub59, %sub59.1
1728  %add94.3 = add nsw i32 %sub59.3, %sub59.2
1729  %sub102.3 = sub nsw i32 %sub59.2, %sub59.3
1730  %add103.3 = add nsw i32 %add94.3, %add78.3
1731  %sub104.3 = sub nsw i32 %add78.3, %add94.3
1732  %add105.3 = add nsw i32 %sub102.3, %sub86.3
1733  %sub106.3 = sub nsw i32 %sub86.3, %sub102.3
1734  %shr.i.3 = lshr i32 %add103.3, 15
1735  %and.i.3 = and i32 %shr.i.3, 65537
1736  %mul.i.3 = mul nuw i32 %and.i.3, 65535
1737  %add.i.3 = add i32 %mul.i.3, %add103.3
1738  %xor.i.3 = xor i32 %add.i.3, %mul.i.3
1739  %shr.i184.3 = lshr i32 %add105.3, 15
1740  %and.i185.3 = and i32 %shr.i184.3, 65537
1741  %mul.i186.3 = mul nuw i32 %and.i185.3, 65535
1742  %add.i187.3 = add i32 %mul.i186.3, %add105.3
1743  %xor.i188.3 = xor i32 %add.i187.3, %mul.i186.3
1744  %shr.i189.3 = lshr i32 %sub104.3, 15
1745  %and.i190.3 = and i32 %shr.i189.3, 65537
1746  %mul.i191.3 = mul nuw i32 %and.i190.3, 65535
1747  %add.i192.3 = add i32 %mul.i191.3, %sub104.3
1748  %xor.i193.3 = xor i32 %add.i192.3, %mul.i191.3
1749  %shr.i194.3 = lshr i32 %sub106.3, 15
1750  %and.i195.3 = and i32 %shr.i194.3, 65537
1751  %mul.i196.3 = mul nuw i32 %and.i195.3, 65535
1752  %add.i197.3 = add i32 %mul.i196.3, %sub106.3
1753  %xor.i198.3 = xor i32 %add.i197.3, %mul.i196.3
1754  %add108.3 = add i32 %xor.i188.3, %add113.2
1755  %add110.3 = add i32 %add108.3, %xor.i.3
1756  %add112.3 = add i32 %add110.3, %xor.i193.3
1757  %add113.3 = add i32 %add112.3, %xor.i198.3
1758  %conv118 = and i32 %add113.3, 65535
1759  %shr = lshr i32 %add113.3, 16
1760  %add119 = add nuw nsw i32 %conv118, %shr
1761  %shr120 = lshr i32 %add119, 1
1762  ret i32 %shr120
1763}
1764