1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -basic-aa -dse -S | FileCheck %s
3
4define void @write4to7(i32* nocapture %p) {
5; CHECK-LABEL: @write4to7(
6; CHECK-NEXT:  entry:
7; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
8; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
9; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
10; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
11; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
12; CHECK-NEXT:    store i32 1, i32* [[ARRAYIDX1]], align 4
13; CHECK-NEXT:    ret void
14;
15entry:
16  %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
17  %p3 = bitcast i32* %arrayidx0 to i8*
18  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i1 false)
19  %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1
20  store i32 1, i32* %arrayidx1, align 4
21  ret void
22}
23
24define void @write4to7_atomic(i32* nocapture %p) {
25; CHECK-LABEL: @write4to7_atomic(
26; CHECK-NEXT:  entry:
27; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
28; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
29; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
30; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4)
31; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[P]], i64 1
32; CHECK-NEXT:    store atomic i32 1, i32* [[ARRAYIDX1]] unordered, align 4
33; CHECK-NEXT:    ret void
34;
35entry:
36  %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
37  %p3 = bitcast i32* %arrayidx0 to i8*
38  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4)
39  %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1
40  store atomic i32 1, i32* %arrayidx1 unordered, align 4
41  ret void
42}
43
44define void @write0to3(i32* nocapture %p) {
45; CHECK-LABEL: @write0to3(
46; CHECK-NEXT:  entry:
47; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
48; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
49; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
50; CHECK-NEXT:    store i32 1, i32* [[P]], align 4
51; CHECK-NEXT:    ret void
52;
53entry:
54  %p3 = bitcast i32* %p to i8*
55  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i1 false)
56  store i32 1, i32* %p, align 4
57  ret void
58}
59
60define void @write0to3_atomic(i32* nocapture %p) {
61; CHECK-LABEL: @write0to3_atomic(
62; CHECK-NEXT:  entry:
63; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
64; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
65; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4)
66; CHECK-NEXT:    store atomic i32 1, i32* [[P]] unordered, align 4
67; CHECK-NEXT:    ret void
68;
69entry:
70  %p3 = bitcast i32* %p to i8*
71  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4)
72  store atomic i32 1, i32* %p unordered, align 4
73  ret void
74}
75
76; Atomicity of the store is weaker from the memset
77define void @write0to3_atomic_weaker(i32* nocapture %p) {
78; CHECK-LABEL: @write0to3_atomic_weaker(
79; CHECK-NEXT:  entry:
80; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
81; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
82; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4)
83; CHECK-NEXT:    store i32 1, i32* [[P]], align 4
84; CHECK-NEXT:    ret void
85;
86entry:
87  %p3 = bitcast i32* %p to i8*
88  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4)
89  store i32 1, i32* %p, align 4
90  ret void
91}
92
93define void @write0to7(i32* nocapture %p) {
94; CHECK-LABEL: @write0to7(
95; CHECK-NEXT:  entry:
96; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
97; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 8
98; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
99; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i64*
100; CHECK-NEXT:    store i64 1, i64* [[P4]], align 8
101; CHECK-NEXT:    ret void
102;
103entry:
104  %p3 = bitcast i32* %p to i8*
105  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i1 false)
106  %p4 = bitcast i32* %p to i64*
107  store i64 1, i64* %p4, align 8
108  ret void
109}
110
111; Changing the memset start and length is okay here because the
112; store is a multiple of the memset element size
113define void @write0to7_atomic(i32* nocapture %p) {
114; CHECK-LABEL: @write0to7_atomic(
115; CHECK-NEXT:  entry:
116; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
117; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 8
118; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4)
119; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i64*
120; CHECK-NEXT:    store atomic i64 1, i64* [[P4]] unordered, align 8
121; CHECK-NEXT:    ret void
122;
123entry:
124  %p3 = bitcast i32* %p to i8*
125  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i32 4)
126  %p4 = bitcast i32* %p to i64*
127  store atomic i64 1, i64* %p4 unordered, align 8
128  ret void
129}
130
131define void @write0to7_2(i32* nocapture %p) {
132; CHECK-LABEL: @write0to7_2(
133; CHECK-NEXT:  entry:
134; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
135; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
136; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
137; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i1 false)
138; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i64*
139; CHECK-NEXT:    store i64 1, i64* [[P4]], align 8
140; CHECK-NEXT:    ret void
141;
142entry:
143  %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
144  %p3 = bitcast i32* %arrayidx0 to i8*
145  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i1 false)
146  %p4 = bitcast i32* %p to i64*
147  store i64 1, i64* %p4, align 8
148  ret void
149}
150
151define void @write0to7_2_atomic(i32* nocapture %p) {
152; CHECK-LABEL: @write0to7_2_atomic(
153; CHECK-NEXT:  entry:
154; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
155; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
156; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
157; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 24, i32 4)
158; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i64*
159; CHECK-NEXT:    store atomic i64 1, i64* [[P4]] unordered, align 8
160; CHECK-NEXT:    ret void
161;
162entry:
163  %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
164  %p3 = bitcast i32* %arrayidx0 to i8*
165  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 28, i32 4)
166  %p4 = bitcast i32* %p to i64*
167  store atomic i64 1, i64* %p4 unordered, align 8
168  ret void
169}
170
171; We do not trim the beginning of the eariler write if the alignment of the
172; start pointer is changed.
173define void @dontwrite0to3_align8(i32* nocapture %p) {
174; CHECK-LABEL: @dontwrite0to3_align8(
175; CHECK-NEXT:  entry:
176; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
177; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[P3]], i8 0, i64 32, i1 false)
178; CHECK-NEXT:    store i32 1, i32* [[P]], align 4
179; CHECK-NEXT:    ret void
180;
181entry:
182  %p3 = bitcast i32* %p to i8*
183  call void @llvm.memset.p0i8.i64(i8* align 8 %p3, i8 0, i64 32, i1 false)
184  store i32 1, i32* %p, align 4
185  ret void
186}
187
188define void @dontwrite0to3_align8_atomic(i32* nocapture %p) {
189; CHECK-LABEL: @dontwrite0to3_align8_atomic(
190; CHECK-NEXT:  entry:
191; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
192; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[P3]], i8 0, i64 32, i32 4)
193; CHECK-NEXT:    store atomic i32 1, i32* [[P]] unordered, align 4
194; CHECK-NEXT:    ret void
195;
196entry:
197  %p3 = bitcast i32* %p to i8*
198  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %p3, i8 0, i64 32, i32 4)
199  store atomic i32 1, i32* %p unordered, align 4
200  ret void
201}
202
203define void @dontwrite0to1(i32* nocapture %p) {
204; CHECK-LABEL: @dontwrite0to1(
205; CHECK-NEXT:  entry:
206; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
207; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i1 false)
208; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i16*
209; CHECK-NEXT:    store i16 1, i16* [[P4]], align 4
210; CHECK-NEXT:    ret void
211;
212entry:
213  %p3 = bitcast i32* %p to i8*
214  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i1 false)
215  %p4 = bitcast i32* %p to i16*
216  store i16 1, i16* %p4, align 4
217  ret void
218}
219
220define void @dontwrite0to1_atomic(i32* nocapture %p) {
221; CHECK-LABEL: @dontwrite0to1_atomic(
222; CHECK-NEXT:  entry:
223; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[P:%.*]] to i8*
224; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[P3]], i8 0, i64 32, i32 4)
225; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i16*
226; CHECK-NEXT:    store atomic i16 1, i16* [[P4]] unordered, align 4
227; CHECK-NEXT:    ret void
228;
229entry:
230  %p3 = bitcast i32* %p to i8*
231  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i32 4)
232  %p4 = bitcast i32* %p to i16*
233  store atomic i16 1, i16* %p4 unordered, align 4
234  ret void
235}
236
237define void @write2to10(i32* nocapture %p) {
238; CHECK-LABEL: @write2to10(
239; CHECK-NEXT:  entry:
240; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
241; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
242; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
243; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 28, i1 false)
244; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i16*
245; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[P4]], i64 1
246; CHECK-NEXT:    [[P5:%.*]] = bitcast i16* [[ARRAYIDX2]] to i64*
247; CHECK-NEXT:    store i64 1, i64* [[P5]], align 8
248; CHECK-NEXT:    ret void
249;
250entry:
251  %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
252  %p3 = bitcast i32* %arrayidx0 to i8*
253  call void @llvm.memset.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i1 false)
254  %p4 = bitcast i32* %p to i16*
255  %arrayidx2 = getelementptr inbounds i16, i16* %p4, i64 1
256  %p5 = bitcast i16* %arrayidx2 to i64*
257  store i64 1, i64* %p5, align 8
258  ret void
259}
260
261define void @write2to10_atomic(i32* nocapture %p) {
262; CHECK-LABEL: @write2to10_atomic(
263; CHECK-NEXT:  entry:
264; CHECK-NEXT:    [[ARRAYIDX0:%.*]] = getelementptr inbounds i32, i32* [[P:%.*]], i64 1
265; CHECK-NEXT:    [[P3:%.*]] = bitcast i32* [[ARRAYIDX0]] to i8*
266; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P3]], i64 4
267; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 28, i32 4)
268; CHECK-NEXT:    [[P4:%.*]] = bitcast i32* [[P]] to i16*
269; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[P4]], i64 1
270; CHECK-NEXT:    [[P5:%.*]] = bitcast i16* [[ARRAYIDX2]] to i64*
271; CHECK-NEXT:    store atomic i64 1, i64* [[P5]] unordered, align 8
272; CHECK-NEXT:    ret void
273;
274entry:
275  %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
276  %p3 = bitcast i32* %arrayidx0 to i8*
277  call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 4 %p3, i8 0, i64 32, i32 4)
278  %p4 = bitcast i32* %p to i16*
279  %arrayidx2 = getelementptr inbounds i16, i16* %p4, i64 1
280  %p5 = bitcast i16* %arrayidx2 to i64*
281  store atomic i64 1, i64* %p5 unordered, align 8
282  ret void
283}
284
285define void @write8To15AndThen0To7(i64* nocapture %P) {
286; CHECK-LABEL: @write8To15AndThen0To7(
287; CHECK-NEXT:  entry:
288; CHECK-NEXT:    [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
289; CHECK-NEXT:    [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
290; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[MYBASE0]], i64 16
291; CHECK-NEXT:    tail call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 16, i1 false)
292; CHECK-NEXT:    [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0
293; CHECK-NEXT:    [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1
294; CHECK-NEXT:    store i64 1, i64* [[BASE64_1]]
295; CHECK-NEXT:    store i64 2, i64* [[BASE64_0]]
296; CHECK-NEXT:    ret void
297;
298entry:
299
300  %base0 = bitcast i64* %P to i8*
301  %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
302  tail call void @llvm.memset.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i1 false)
303
304  %base64_0 = getelementptr inbounds i64, i64* %P, i64 0
305  %base64_1 = getelementptr inbounds i64, i64* %P, i64 1
306
307  store i64 1, i64* %base64_1
308  store i64 2, i64* %base64_0
309  ret void
310}
311
312define void @write8To15AndThen0To7_atomic(i64* nocapture %P) {
313; CHECK-LABEL: @write8To15AndThen0To7_atomic(
314; CHECK-NEXT:  entry:
315; CHECK-NEXT:    [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
316; CHECK-NEXT:    [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
317; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[MYBASE0]], i64 16
318; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 16, i32 8)
319; CHECK-NEXT:    [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0
320; CHECK-NEXT:    [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1
321; CHECK-NEXT:    store atomic i64 1, i64* [[BASE64_1]] unordered, align 8
322; CHECK-NEXT:    store atomic i64 2, i64* [[BASE64_0]] unordered, align 8
323; CHECK-NEXT:    ret void
324;
325entry:
326
327  %base0 = bitcast i64* %P to i8*
328  %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
329  tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8)
330
331  %base64_0 = getelementptr inbounds i64, i64* %P, i64 0
332  %base64_1 = getelementptr inbounds i64, i64* %P, i64 1
333
334  store atomic i64 1, i64* %base64_1 unordered, align 8
335  store atomic i64 2, i64* %base64_0 unordered, align 8
336  ret void
337}
338
339define void @write8To15AndThen0To7_atomic_weaker(i64* nocapture %P) {
340; CHECK-LABEL: @write8To15AndThen0To7_atomic_weaker(
341; CHECK-NEXT:  entry:
342; CHECK-NEXT:    [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
343; CHECK-NEXT:    [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
344; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[MYBASE0]], i64 16
345; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 16, i32 8)
346; CHECK-NEXT:    [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0
347; CHECK-NEXT:    [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1
348; CHECK-NEXT:    store atomic i64 1, i64* [[BASE64_1]] unordered, align 8
349; CHECK-NEXT:    store i64 2, i64* [[BASE64_0]], align 8
350; CHECK-NEXT:    ret void
351;
352entry:
353
354  %base0 = bitcast i64* %P to i8*
355  %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
356  tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8)
357
358  %base64_0 = getelementptr inbounds i64, i64* %P, i64 0
359  %base64_1 = getelementptr inbounds i64, i64* %P, i64 1
360
361  store atomic i64 1, i64* %base64_1 unordered, align 8
362  store i64 2, i64* %base64_0, align 8
363  ret void
364}
365
366define void @write8To15AndThen0To7_atomic_weaker_2(i64* nocapture %P) {
367; CHECK-LABEL: @write8To15AndThen0To7_atomic_weaker_2(
368; CHECK-NEXT:  entry:
369; CHECK-NEXT:    [[BASE0:%.*]] = bitcast i64* [[P:%.*]] to i8*
370; CHECK-NEXT:    [[MYBASE0:%.*]] = getelementptr inbounds i8, i8* [[BASE0]], i64 0
371; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[MYBASE0]], i64 16
372; CHECK-NEXT:    tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 [[TMP0]], i8 0, i64 16, i32 8)
373; CHECK-NEXT:    [[BASE64_0:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 0
374; CHECK-NEXT:    [[BASE64_1:%.*]] = getelementptr inbounds i64, i64* [[P]], i64 1
375; CHECK-NEXT:    store i64 1, i64* [[BASE64_1]], align 8
376; CHECK-NEXT:    store atomic i64 2, i64* [[BASE64_0]] unordered, align 8
377; CHECK-NEXT:    ret void
378;
379entry:
380
381  %base0 = bitcast i64* %P to i8*
382  %mybase0 = getelementptr inbounds i8, i8* %base0, i64 0
383  tail call void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* align 8 %mybase0, i8 0, i64 32, i32 8)
384
385  %base64_0 = getelementptr inbounds i64, i64* %P, i64 0
386  %base64_1 = getelementptr inbounds i64, i64* %P, i64 1
387
388  store i64 1, i64* %base64_1, align 8
389  store atomic i64 2, i64* %base64_0 unordered, align 8
390  ret void
391}
392
393declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
394declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind
395
396define void @ow_begin_align1(i8* nocapture %p) {
397; CHECK-LABEL: @ow_begin_align1(
398; CHECK-NEXT:  entry:
399; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i64 1
400; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 7
401; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 1 [[TMP0]], i8 0, i64 25, i1 false)
402; CHECK-NEXT:    [[P2:%.*]] = bitcast i8* [[P]] to i64*
403; CHECK-NEXT:    store i64 1, i64* [[P2]], align 1
404; CHECK-NEXT:    ret void
405;
406entry:
407  %p1 = getelementptr inbounds i8, i8* %p, i64 1
408  call void @llvm.memset.p0i8.i64(i8* align 1 %p1, i8 0, i64 32, i1 false)
409  %p2 = bitcast i8* %p to i64*
410  store i64 1, i64* %p2, align 1
411  ret void
412}
413
414define void @ow_end_align4(i8* nocapture %p) {
415; CHECK-LABEL: @ow_end_align4(
416; CHECK-NEXT:  entry:
417; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i64 1
418; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, i8* [[P1]], i64 4
419; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP0]], i8 0, i64 28, i1 false)
420; CHECK-NEXT:    [[P2:%.*]] = bitcast i8* [[P]] to i64*
421; CHECK-NEXT:    store i64 1, i64* [[P2]], align 1
422; CHECK-NEXT:    ret void
423;
424entry:
425  %p1 = getelementptr inbounds i8, i8* %p, i64 1
426  call void @llvm.memset.p0i8.i64(i8* align 4 %p1, i8 0, i64 32, i1 false)
427  %p2 = bitcast i8* %p to i64*
428  store i64 1, i64* %p2, align 1
429  ret void
430}
431
432define void @ow_end_align8(i8* nocapture %p) {
433; CHECK-LABEL: @ow_end_align8(
434; CHECK-NEXT:  entry:
435; CHECK-NEXT:    [[P1:%.*]] = getelementptr inbounds i8, i8* [[P:%.*]], i64 1
436; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[P1]], i8 0, i64 32, i1 false)
437; CHECK-NEXT:    [[P2:%.*]] = bitcast i8* [[P]] to i64*
438; CHECK-NEXT:    store i64 1, i64* [[P2]], align 1
439; CHECK-NEXT:    ret void
440;
441entry:
442  %p1 = getelementptr inbounds i8, i8* %p, i64 1
443  call void @llvm.memset.p0i8.i64(i8* align 8 %p1, i8 0, i64 32, i1 false)
444  %p2 = bitcast i8* %p to i64*
445  store i64 1, i64* %p2, align 1
446  ret void
447}
448