1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
3; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED
4; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2
5; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512
6
7target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
8target triple = "x86_64-unknown-linux-gnu"
9
10%struct.S = type { i32, i32, i32, i32 }
11
12; Function Attrs: nounwind uwtable
13define void @test_conditional_block(ptr nocapture noalias %s1 , ptr nocapture noalias %s2, i32 %x, ptr nocapture noalias  %s3, ptr nocapture noalias readonly %s4) local_unnamed_addr #0 {
14; CHECK-LABEL: test_conditional_block:
15; CHECK:       # %bb.0: # %entry
16; CHECK-NEXT:    cmpl $18, %edx
17; CHECK-NEXT:    jl .LBB0_2
18; CHECK-NEXT:  # %bb.1: # %if.then
19; CHECK-NEXT:    movl %edx, 4(%rdi)
20; CHECK-NEXT:  .LBB0_2: # %if.end
21; CHECK-NEXT:    movups (%r8), %xmm0
22; CHECK-NEXT:    movups %xmm0, (%rcx)
23; CHECK-NEXT:    movl (%rdi), %eax
24; CHECK-NEXT:    movl %eax, (%rsi)
25; CHECK-NEXT:    movl 4(%rdi), %eax
26; CHECK-NEXT:    movl %eax, 4(%rsi)
27; CHECK-NEXT:    movq 8(%rdi), %rax
28; CHECK-NEXT:    movq %rax, 8(%rsi)
29; CHECK-NEXT:    retq
30;
31; DISABLED-LABEL: test_conditional_block:
32; DISABLED:       # %bb.0: # %entry
33; DISABLED-NEXT:    cmpl $18, %edx
34; DISABLED-NEXT:    jl .LBB0_2
35; DISABLED-NEXT:  # %bb.1: # %if.then
36; DISABLED-NEXT:    movl %edx, 4(%rdi)
37; DISABLED-NEXT:  .LBB0_2: # %if.end
38; DISABLED-NEXT:    movups (%r8), %xmm0
39; DISABLED-NEXT:    movups %xmm0, (%rcx)
40; DISABLED-NEXT:    movups (%rdi), %xmm0
41; DISABLED-NEXT:    movups %xmm0, (%rsi)
42; DISABLED-NEXT:    retq
43;
44; CHECK-AVX2-LABEL: test_conditional_block:
45; CHECK-AVX2:       # %bb.0: # %entry
46; CHECK-AVX2-NEXT:    cmpl $18, %edx
47; CHECK-AVX2-NEXT:    jl .LBB0_2
48; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
49; CHECK-AVX2-NEXT:    movl %edx, 4(%rdi)
50; CHECK-AVX2-NEXT:  .LBB0_2: # %if.end
51; CHECK-AVX2-NEXT:    vmovups (%r8), %xmm0
52; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rcx)
53; CHECK-AVX2-NEXT:    movl (%rdi), %eax
54; CHECK-AVX2-NEXT:    movl %eax, (%rsi)
55; CHECK-AVX2-NEXT:    movl 4(%rdi), %eax
56; CHECK-AVX2-NEXT:    movl %eax, 4(%rsi)
57; CHECK-AVX2-NEXT:    movq 8(%rdi), %rax
58; CHECK-AVX2-NEXT:    movq %rax, 8(%rsi)
59; CHECK-AVX2-NEXT:    retq
60;
61; CHECK-AVX512-LABEL: test_conditional_block:
62; CHECK-AVX512:       # %bb.0: # %entry
63; CHECK-AVX512-NEXT:    cmpl $18, %edx
64; CHECK-AVX512-NEXT:    jl .LBB0_2
65; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
66; CHECK-AVX512-NEXT:    movl %edx, 4(%rdi)
67; CHECK-AVX512-NEXT:  .LBB0_2: # %if.end
68; CHECK-AVX512-NEXT:    vmovups (%r8), %xmm0
69; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rcx)
70; CHECK-AVX512-NEXT:    movl (%rdi), %eax
71; CHECK-AVX512-NEXT:    movl %eax, (%rsi)
72; CHECK-AVX512-NEXT:    movl 4(%rdi), %eax
73; CHECK-AVX512-NEXT:    movl %eax, 4(%rsi)
74; CHECK-AVX512-NEXT:    movq 8(%rdi), %rax
75; CHECK-AVX512-NEXT:    movq %rax, 8(%rsi)
76; CHECK-AVX512-NEXT:    retq
77entry:
78  %cmp = icmp sgt i32 %x, 17
79  br i1 %cmp, label %if.then, label %if.end
80
81if.then:                                          ; preds = %entry
82  %b = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 1
83  store i32 %x, ptr %b, align 4
84  br label %if.end
85
86if.end:                                           ; preds = %if.then, %entry
87  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 16, i32 4, i1 false)
88  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 4, i1 false)
89  ret void
90}
91
92; Function Attrs: nounwind uwtable
93define void @test_imm_store(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3) local_unnamed_addr #0 {
94; CHECK-LABEL: test_imm_store:
95; CHECK:       # %bb.0: # %entry
96; CHECK-NEXT:    movl $0, (%rdi)
97; CHECK-NEXT:    movl $1, (%rcx)
98; CHECK-NEXT:    movl (%rdi), %eax
99; CHECK-NEXT:    movl %eax, (%rsi)
100; CHECK-NEXT:    movq 4(%rdi), %rax
101; CHECK-NEXT:    movq %rax, 4(%rsi)
102; CHECK-NEXT:    movl 12(%rdi), %eax
103; CHECK-NEXT:    movl %eax, 12(%rsi)
104; CHECK-NEXT:    retq
105;
106; DISABLED-LABEL: test_imm_store:
107; DISABLED:       # %bb.0: # %entry
108; DISABLED-NEXT:    movl $0, (%rdi)
109; DISABLED-NEXT:    movl $1, (%rcx)
110; DISABLED-NEXT:    movups (%rdi), %xmm0
111; DISABLED-NEXT:    movups %xmm0, (%rsi)
112; DISABLED-NEXT:    retq
113;
114; CHECK-AVX2-LABEL: test_imm_store:
115; CHECK-AVX2:       # %bb.0: # %entry
116; CHECK-AVX2-NEXT:    movl $0, (%rdi)
117; CHECK-AVX2-NEXT:    movl $1, (%rcx)
118; CHECK-AVX2-NEXT:    movl (%rdi), %eax
119; CHECK-AVX2-NEXT:    movl %eax, (%rsi)
120; CHECK-AVX2-NEXT:    movq 4(%rdi), %rax
121; CHECK-AVX2-NEXT:    movq %rax, 4(%rsi)
122; CHECK-AVX2-NEXT:    movl 12(%rdi), %eax
123; CHECK-AVX2-NEXT:    movl %eax, 12(%rsi)
124; CHECK-AVX2-NEXT:    retq
125;
126; CHECK-AVX512-LABEL: test_imm_store:
127; CHECK-AVX512:       # %bb.0: # %entry
128; CHECK-AVX512-NEXT:    movl $0, (%rdi)
129; CHECK-AVX512-NEXT:    movl $1, (%rcx)
130; CHECK-AVX512-NEXT:    movl (%rdi), %eax
131; CHECK-AVX512-NEXT:    movl %eax, (%rsi)
132; CHECK-AVX512-NEXT:    movq 4(%rdi), %rax
133; CHECK-AVX512-NEXT:    movq %rax, 4(%rsi)
134; CHECK-AVX512-NEXT:    movl 12(%rdi), %eax
135; CHECK-AVX512-NEXT:    movl %eax, 12(%rsi)
136; CHECK-AVX512-NEXT:    retq
137entry:
138  store i32 0, ptr %s1, align 4
139  store i32 1, ptr %s3, align 4
140  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 4, i1 false)
141  ret void
142}
143
144; Function Attrs: nounwind uwtable
145define void @test_nondirect_br(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
146; CHECK-LABEL: test_nondirect_br:
147; CHECK:       # %bb.0: # %entry
148; CHECK-NEXT:    cmpl $18, %edx
149; CHECK-NEXT:    jl .LBB2_2
150; CHECK-NEXT:  # %bb.1: # %if.then
151; CHECK-NEXT:    movl %edx, 4(%rdi)
152; CHECK-NEXT:  .LBB2_2: # %if.end
153; CHECK-NEXT:    cmpl $14, %r9d
154; CHECK-NEXT:    jl .LBB2_4
155; CHECK-NEXT:  # %bb.3: # %if.then2
156; CHECK-NEXT:    movl %r9d, 12(%rdi)
157; CHECK-NEXT:  .LBB2_4: # %if.end3
158; CHECK-NEXT:    movups (%r8), %xmm0
159; CHECK-NEXT:    movups %xmm0, (%rcx)
160; CHECK-NEXT:    movq (%rdi), %rax
161; CHECK-NEXT:    movq %rax, (%rsi)
162; CHECK-NEXT:    movl 8(%rdi), %eax
163; CHECK-NEXT:    movl %eax, 8(%rsi)
164; CHECK-NEXT:    movl 12(%rdi), %eax
165; CHECK-NEXT:    movl %eax, 12(%rsi)
166; CHECK-NEXT:    retq
167;
168; DISABLED-LABEL: test_nondirect_br:
169; DISABLED:       # %bb.0: # %entry
170; DISABLED-NEXT:    cmpl $18, %edx
171; DISABLED-NEXT:    jl .LBB2_2
172; DISABLED-NEXT:  # %bb.1: # %if.then
173; DISABLED-NEXT:    movl %edx, 4(%rdi)
174; DISABLED-NEXT:  .LBB2_2: # %if.end
175; DISABLED-NEXT:    cmpl $14, %r9d
176; DISABLED-NEXT:    jl .LBB2_4
177; DISABLED-NEXT:  # %bb.3: # %if.then2
178; DISABLED-NEXT:    movl %r9d, 12(%rdi)
179; DISABLED-NEXT:  .LBB2_4: # %if.end3
180; DISABLED-NEXT:    movups (%r8), %xmm0
181; DISABLED-NEXT:    movups %xmm0, (%rcx)
182; DISABLED-NEXT:    movups (%rdi), %xmm0
183; DISABLED-NEXT:    movups %xmm0, (%rsi)
184; DISABLED-NEXT:    retq
185;
186; CHECK-AVX2-LABEL: test_nondirect_br:
187; CHECK-AVX2:       # %bb.0: # %entry
188; CHECK-AVX2-NEXT:    cmpl $18, %edx
189; CHECK-AVX2-NEXT:    jl .LBB2_2
190; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
191; CHECK-AVX2-NEXT:    movl %edx, 4(%rdi)
192; CHECK-AVX2-NEXT:  .LBB2_2: # %if.end
193; CHECK-AVX2-NEXT:    cmpl $14, %r9d
194; CHECK-AVX2-NEXT:    jl .LBB2_4
195; CHECK-AVX2-NEXT:  # %bb.3: # %if.then2
196; CHECK-AVX2-NEXT:    movl %r9d, 12(%rdi)
197; CHECK-AVX2-NEXT:  .LBB2_4: # %if.end3
198; CHECK-AVX2-NEXT:    vmovups (%r8), %xmm0
199; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rcx)
200; CHECK-AVX2-NEXT:    movq (%rdi), %rax
201; CHECK-AVX2-NEXT:    movq %rax, (%rsi)
202; CHECK-AVX2-NEXT:    movl 8(%rdi), %eax
203; CHECK-AVX2-NEXT:    movl %eax, 8(%rsi)
204; CHECK-AVX2-NEXT:    movl 12(%rdi), %eax
205; CHECK-AVX2-NEXT:    movl %eax, 12(%rsi)
206; CHECK-AVX2-NEXT:    retq
207;
208; CHECK-AVX512-LABEL: test_nondirect_br:
209; CHECK-AVX512:       # %bb.0: # %entry
210; CHECK-AVX512-NEXT:    cmpl $18, %edx
211; CHECK-AVX512-NEXT:    jl .LBB2_2
212; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
213; CHECK-AVX512-NEXT:    movl %edx, 4(%rdi)
214; CHECK-AVX512-NEXT:  .LBB2_2: # %if.end
215; CHECK-AVX512-NEXT:    cmpl $14, %r9d
216; CHECK-AVX512-NEXT:    jl .LBB2_4
217; CHECK-AVX512-NEXT:  # %bb.3: # %if.then2
218; CHECK-AVX512-NEXT:    movl %r9d, 12(%rdi)
219; CHECK-AVX512-NEXT:  .LBB2_4: # %if.end3
220; CHECK-AVX512-NEXT:    vmovups (%r8), %xmm0
221; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rcx)
222; CHECK-AVX512-NEXT:    movq (%rdi), %rax
223; CHECK-AVX512-NEXT:    movq %rax, (%rsi)
224; CHECK-AVX512-NEXT:    movl 8(%rdi), %eax
225; CHECK-AVX512-NEXT:    movl %eax, 8(%rsi)
226; CHECK-AVX512-NEXT:    movl 12(%rdi), %eax
227; CHECK-AVX512-NEXT:    movl %eax, 12(%rsi)
228; CHECK-AVX512-NEXT:    retq
229entry:
230  %cmp = icmp sgt i32 %x, 17
231  br i1 %cmp, label %if.then, label %if.end
232
233if.then:                                          ; preds = %entry
234  %b = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 1
235  store i32 %x, ptr %b, align 4
236  br label %if.end
237
238if.end:                                           ; preds = %if.then, %entry
239  %cmp1 = icmp sgt i32 %x2, 13
240  br i1 %cmp1, label %if.then2, label %if.end3
241
242if.then2:                                         ; preds = %if.end
243  %d = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 3
244  store i32 %x2, ptr %d, align 4
245  br label %if.end3
246
247if.end3:                                          ; preds = %if.then2, %if.end
248  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 16, i32 4, i1 false)
249  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 4, i1 false)
250  ret void
251}
252
253; Function Attrs: nounwind uwtable
254define void @test_2preds_block(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
255; CHECK-LABEL: test_2preds_block:
256; CHECK:       # %bb.0: # %entry
257; CHECK-NEXT:    movl %r9d, 12(%rdi)
258; CHECK-NEXT:    cmpl $18, %edx
259; CHECK-NEXT:    jl .LBB3_2
260; CHECK-NEXT:  # %bb.1: # %if.then
261; CHECK-NEXT:    movl %edx, 4(%rdi)
262; CHECK-NEXT:  .LBB3_2: # %if.end
263; CHECK-NEXT:    movups (%r8), %xmm0
264; CHECK-NEXT:    movups %xmm0, (%rcx)
265; CHECK-NEXT:    movl (%rdi), %eax
266; CHECK-NEXT:    movl %eax, (%rsi)
267; CHECK-NEXT:    movl 4(%rdi), %eax
268; CHECK-NEXT:    movl %eax, 4(%rsi)
269; CHECK-NEXT:    movl 8(%rdi), %eax
270; CHECK-NEXT:    movl %eax, 8(%rsi)
271; CHECK-NEXT:    movl 12(%rdi), %eax
272; CHECK-NEXT:    movl %eax, 12(%rsi)
273; CHECK-NEXT:    retq
274;
275; DISABLED-LABEL: test_2preds_block:
276; DISABLED:       # %bb.0: # %entry
277; DISABLED-NEXT:    movl %r9d, 12(%rdi)
278; DISABLED-NEXT:    cmpl $18, %edx
279; DISABLED-NEXT:    jl .LBB3_2
280; DISABLED-NEXT:  # %bb.1: # %if.then
281; DISABLED-NEXT:    movl %edx, 4(%rdi)
282; DISABLED-NEXT:  .LBB3_2: # %if.end
283; DISABLED-NEXT:    movups (%r8), %xmm0
284; DISABLED-NEXT:    movups %xmm0, (%rcx)
285; DISABLED-NEXT:    movups (%rdi), %xmm0
286; DISABLED-NEXT:    movups %xmm0, (%rsi)
287; DISABLED-NEXT:    retq
288;
289; CHECK-AVX2-LABEL: test_2preds_block:
290; CHECK-AVX2:       # %bb.0: # %entry
291; CHECK-AVX2-NEXT:    movl %r9d, 12(%rdi)
292; CHECK-AVX2-NEXT:    cmpl $18, %edx
293; CHECK-AVX2-NEXT:    jl .LBB3_2
294; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
295; CHECK-AVX2-NEXT:    movl %edx, 4(%rdi)
296; CHECK-AVX2-NEXT:  .LBB3_2: # %if.end
297; CHECK-AVX2-NEXT:    vmovups (%r8), %xmm0
298; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rcx)
299; CHECK-AVX2-NEXT:    movl (%rdi), %eax
300; CHECK-AVX2-NEXT:    movl %eax, (%rsi)
301; CHECK-AVX2-NEXT:    movl 4(%rdi), %eax
302; CHECK-AVX2-NEXT:    movl %eax, 4(%rsi)
303; CHECK-AVX2-NEXT:    movl 8(%rdi), %eax
304; CHECK-AVX2-NEXT:    movl %eax, 8(%rsi)
305; CHECK-AVX2-NEXT:    movl 12(%rdi), %eax
306; CHECK-AVX2-NEXT:    movl %eax, 12(%rsi)
307; CHECK-AVX2-NEXT:    retq
308;
309; CHECK-AVX512-LABEL: test_2preds_block:
310; CHECK-AVX512:       # %bb.0: # %entry
311; CHECK-AVX512-NEXT:    movl %r9d, 12(%rdi)
312; CHECK-AVX512-NEXT:    cmpl $18, %edx
313; CHECK-AVX512-NEXT:    jl .LBB3_2
314; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
315; CHECK-AVX512-NEXT:    movl %edx, 4(%rdi)
316; CHECK-AVX512-NEXT:  .LBB3_2: # %if.end
317; CHECK-AVX512-NEXT:    vmovups (%r8), %xmm0
318; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rcx)
319; CHECK-AVX512-NEXT:    movl (%rdi), %eax
320; CHECK-AVX512-NEXT:    movl %eax, (%rsi)
321; CHECK-AVX512-NEXT:    movl 4(%rdi), %eax
322; CHECK-AVX512-NEXT:    movl %eax, 4(%rsi)
323; CHECK-AVX512-NEXT:    movl 8(%rdi), %eax
324; CHECK-AVX512-NEXT:    movl %eax, 8(%rsi)
325; CHECK-AVX512-NEXT:    movl 12(%rdi), %eax
326; CHECK-AVX512-NEXT:    movl %eax, 12(%rsi)
327; CHECK-AVX512-NEXT:    retq
328entry:
329  %d = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 3
330  store i32 %x2, ptr %d, align 4
331  %cmp = icmp sgt i32 %x, 17
332  br i1 %cmp, label %if.then, label %if.end
333
334if.then:                                          ; preds = %entry
335  %b = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 1
336  store i32 %x, ptr %b, align 4
337  br label %if.end
338
339if.end:                                           ; preds = %if.then, %entry
340  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 16, i32 4, i1 false)
341  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 4, i1 false)
342  ret void
343}
344%struct.S2 = type { i64, i64 }
345
346; Function Attrs: nounwind uwtable
347define void @test_type64(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4) local_unnamed_addr #0 {
348; CHECK-LABEL: test_type64:
349; CHECK:       # %bb.0: # %entry
350; CHECK-NEXT:    cmpl $18, %edx
351; CHECK-NEXT:    jl .LBB4_2
352; CHECK-NEXT:  # %bb.1: # %if.then
353; CHECK-NEXT:    movslq %edx, %rax
354; CHECK-NEXT:    movq %rax, 8(%rdi)
355; CHECK-NEXT:  .LBB4_2: # %if.end
356; CHECK-NEXT:    movups (%r8), %xmm0
357; CHECK-NEXT:    movups %xmm0, (%rcx)
358; CHECK-NEXT:    movq (%rdi), %rax
359; CHECK-NEXT:    movq %rax, (%rsi)
360; CHECK-NEXT:    movq 8(%rdi), %rax
361; CHECK-NEXT:    movq %rax, 8(%rsi)
362; CHECK-NEXT:    retq
363;
364; DISABLED-LABEL: test_type64:
365; DISABLED:       # %bb.0: # %entry
366; DISABLED-NEXT:    cmpl $18, %edx
367; DISABLED-NEXT:    jl .LBB4_2
368; DISABLED-NEXT:  # %bb.1: # %if.then
369; DISABLED-NEXT:    movslq %edx, %rax
370; DISABLED-NEXT:    movq %rax, 8(%rdi)
371; DISABLED-NEXT:  .LBB4_2: # %if.end
372; DISABLED-NEXT:    movups (%r8), %xmm0
373; DISABLED-NEXT:    movups %xmm0, (%rcx)
374; DISABLED-NEXT:    movups (%rdi), %xmm0
375; DISABLED-NEXT:    movups %xmm0, (%rsi)
376; DISABLED-NEXT:    retq
377;
378; CHECK-AVX2-LABEL: test_type64:
379; CHECK-AVX2:       # %bb.0: # %entry
380; CHECK-AVX2-NEXT:    cmpl $18, %edx
381; CHECK-AVX2-NEXT:    jl .LBB4_2
382; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
383; CHECK-AVX2-NEXT:    movslq %edx, %rax
384; CHECK-AVX2-NEXT:    movq %rax, 8(%rdi)
385; CHECK-AVX2-NEXT:  .LBB4_2: # %if.end
386; CHECK-AVX2-NEXT:    vmovups (%r8), %xmm0
387; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rcx)
388; CHECK-AVX2-NEXT:    movq (%rdi), %rax
389; CHECK-AVX2-NEXT:    movq %rax, (%rsi)
390; CHECK-AVX2-NEXT:    movq 8(%rdi), %rax
391; CHECK-AVX2-NEXT:    movq %rax, 8(%rsi)
392; CHECK-AVX2-NEXT:    retq
393;
394; CHECK-AVX512-LABEL: test_type64:
395; CHECK-AVX512:       # %bb.0: # %entry
396; CHECK-AVX512-NEXT:    cmpl $18, %edx
397; CHECK-AVX512-NEXT:    jl .LBB4_2
398; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
399; CHECK-AVX512-NEXT:    movslq %edx, %rax
400; CHECK-AVX512-NEXT:    movq %rax, 8(%rdi)
401; CHECK-AVX512-NEXT:  .LBB4_2: # %if.end
402; CHECK-AVX512-NEXT:    vmovups (%r8), %xmm0
403; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rcx)
404; CHECK-AVX512-NEXT:    movq (%rdi), %rax
405; CHECK-AVX512-NEXT:    movq %rax, (%rsi)
406; CHECK-AVX512-NEXT:    movq 8(%rdi), %rax
407; CHECK-AVX512-NEXT:    movq %rax, 8(%rsi)
408; CHECK-AVX512-NEXT:    retq
409entry:
410  %cmp = icmp sgt i32 %x, 17
411  br i1 %cmp, label %if.then, label %if.end
412
413if.then:                                          ; preds = %entry
414  %conv = sext i32 %x to i64
415  %b = getelementptr inbounds %struct.S2, ptr %s1, i64 0, i32 1
416  store i64 %conv, ptr %b, align 8
417  br label %if.end
418
419if.end:                                           ; preds = %if.then, %entry
420  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 16, i32 8, i1 false)
421  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 8, i1 false)
422  ret void
423}
424%struct.S3 = type { i64, i8, i8, i16, i32 }
425
426; Function Attrs: noinline nounwind uwtable
427define void @test_mixed_type(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture readnone %s3, ptr nocapture readnone %s4) local_unnamed_addr #0 {
428; CHECK-LABEL: test_mixed_type:
429; CHECK:       # %bb.0: # %entry
430; CHECK-NEXT:    cmpl $18, %edx
431; CHECK-NEXT:    jl .LBB5_2
432; CHECK-NEXT:  # %bb.1: # %if.then
433; CHECK-NEXT:    movslq %edx, %rax
434; CHECK-NEXT:    movq %rax, (%rdi)
435; CHECK-NEXT:    movb %dl, 8(%rdi)
436; CHECK-NEXT:  .LBB5_2: # %if.end
437; CHECK-NEXT:    movq (%rdi), %rax
438; CHECK-NEXT:    movq %rax, (%rsi)
439; CHECK-NEXT:    movzbl 8(%rdi), %eax
440; CHECK-NEXT:    movb %al, 8(%rsi)
441; CHECK-NEXT:    movl 9(%rdi), %eax
442; CHECK-NEXT:    movl %eax, 9(%rsi)
443; CHECK-NEXT:    movzwl 13(%rdi), %eax
444; CHECK-NEXT:    movw %ax, 13(%rsi)
445; CHECK-NEXT:    movzbl 15(%rdi), %eax
446; CHECK-NEXT:    movb %al, 15(%rsi)
447; CHECK-NEXT:    retq
448;
449; DISABLED-LABEL: test_mixed_type:
450; DISABLED:       # %bb.0: # %entry
451; DISABLED-NEXT:    cmpl $18, %edx
452; DISABLED-NEXT:    jl .LBB5_2
453; DISABLED-NEXT:  # %bb.1: # %if.then
454; DISABLED-NEXT:    movslq %edx, %rax
455; DISABLED-NEXT:    movq %rax, (%rdi)
456; DISABLED-NEXT:    movb %dl, 8(%rdi)
457; DISABLED-NEXT:  .LBB5_2: # %if.end
458; DISABLED-NEXT:    movups (%rdi), %xmm0
459; DISABLED-NEXT:    movups %xmm0, (%rsi)
460; DISABLED-NEXT:    retq
461;
462; CHECK-AVX2-LABEL: test_mixed_type:
463; CHECK-AVX2:       # %bb.0: # %entry
464; CHECK-AVX2-NEXT:    cmpl $18, %edx
465; CHECK-AVX2-NEXT:    jl .LBB5_2
466; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
467; CHECK-AVX2-NEXT:    movslq %edx, %rax
468; CHECK-AVX2-NEXT:    movq %rax, (%rdi)
469; CHECK-AVX2-NEXT:    movb %dl, 8(%rdi)
470; CHECK-AVX2-NEXT:  .LBB5_2: # %if.end
471; CHECK-AVX2-NEXT:    movq (%rdi), %rax
472; CHECK-AVX2-NEXT:    movq %rax, (%rsi)
473; CHECK-AVX2-NEXT:    movzbl 8(%rdi), %eax
474; CHECK-AVX2-NEXT:    movb %al, 8(%rsi)
475; CHECK-AVX2-NEXT:    movl 9(%rdi), %eax
476; CHECK-AVX2-NEXT:    movl %eax, 9(%rsi)
477; CHECK-AVX2-NEXT:    movzwl 13(%rdi), %eax
478; CHECK-AVX2-NEXT:    movw %ax, 13(%rsi)
479; CHECK-AVX2-NEXT:    movzbl 15(%rdi), %eax
480; CHECK-AVX2-NEXT:    movb %al, 15(%rsi)
481; CHECK-AVX2-NEXT:    retq
482;
483; CHECK-AVX512-LABEL: test_mixed_type:
484; CHECK-AVX512:       # %bb.0: # %entry
485; CHECK-AVX512-NEXT:    cmpl $18, %edx
486; CHECK-AVX512-NEXT:    jl .LBB5_2
487; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
488; CHECK-AVX512-NEXT:    movslq %edx, %rax
489; CHECK-AVX512-NEXT:    movq %rax, (%rdi)
490; CHECK-AVX512-NEXT:    movb %dl, 8(%rdi)
491; CHECK-AVX512-NEXT:  .LBB5_2: # %if.end
492; CHECK-AVX512-NEXT:    movq (%rdi), %rax
493; CHECK-AVX512-NEXT:    movq %rax, (%rsi)
494; CHECK-AVX512-NEXT:    movzbl 8(%rdi), %eax
495; CHECK-AVX512-NEXT:    movb %al, 8(%rsi)
496; CHECK-AVX512-NEXT:    movl 9(%rdi), %eax
497; CHECK-AVX512-NEXT:    movl %eax, 9(%rsi)
498; CHECK-AVX512-NEXT:    movzwl 13(%rdi), %eax
499; CHECK-AVX512-NEXT:    movw %ax, 13(%rsi)
500; CHECK-AVX512-NEXT:    movzbl 15(%rdi), %eax
501; CHECK-AVX512-NEXT:    movb %al, 15(%rsi)
502; CHECK-AVX512-NEXT:    retq
503entry:
504  %cmp = icmp sgt i32 %x, 17
505  br i1 %cmp, label %if.then, label %if.end
506
507if.then:                                          ; preds = %entry
508  %conv = sext i32 %x to i64
509  store i64 %conv, ptr %s1, align 8
510  %conv1 = trunc i32 %x to i8
511  %b = getelementptr inbounds %struct.S3, ptr %s1, i64 0, i32 1
512  store i8 %conv1, ptr %b, align 8
513  br label %if.end
514
515if.end:                                           ; preds = %if.then, %entry
516  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 8, i1 false)
517  ret void
518}
519%struct.S4 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
520
521; Function Attrs: nounwind uwtable
522define void @test_multiple_blocks(ptr nocapture noalias %s1, ptr nocapture %s2) local_unnamed_addr #0 {
523; CHECK-LABEL: test_multiple_blocks:
524; CHECK:       # %bb.0: # %entry
525; CHECK-NEXT:    movl $0, 4(%rdi)
526; CHECK-NEXT:    movl $0, 36(%rdi)
527; CHECK-NEXT:    movups 16(%rdi), %xmm0
528; CHECK-NEXT:    movups %xmm0, 16(%rsi)
529; CHECK-NEXT:    movl 32(%rdi), %eax
530; CHECK-NEXT:    movl %eax, 32(%rsi)
531; CHECK-NEXT:    movl 36(%rdi), %eax
532; CHECK-NEXT:    movl %eax, 36(%rsi)
533; CHECK-NEXT:    movq 40(%rdi), %rax
534; CHECK-NEXT:    movq %rax, 40(%rsi)
535; CHECK-NEXT:    movl (%rdi), %eax
536; CHECK-NEXT:    movl %eax, (%rsi)
537; CHECK-NEXT:    movl 4(%rdi), %eax
538; CHECK-NEXT:    movl %eax, 4(%rsi)
539; CHECK-NEXT:    movq 8(%rdi), %rax
540; CHECK-NEXT:    movq %rax, 8(%rsi)
541; CHECK-NEXT:    retq
542;
543; DISABLED-LABEL: test_multiple_blocks:
544; DISABLED:       # %bb.0: # %entry
545; DISABLED-NEXT:    movl $0, 4(%rdi)
546; DISABLED-NEXT:    movl $0, 36(%rdi)
547; DISABLED-NEXT:    movups 16(%rdi), %xmm0
548; DISABLED-NEXT:    movups %xmm0, 16(%rsi)
549; DISABLED-NEXT:    movups 32(%rdi), %xmm0
550; DISABLED-NEXT:    movups %xmm0, 32(%rsi)
551; DISABLED-NEXT:    movups (%rdi), %xmm0
552; DISABLED-NEXT:    movups %xmm0, (%rsi)
553; DISABLED-NEXT:    retq
554;
555; CHECK-AVX2-LABEL: test_multiple_blocks:
556; CHECK-AVX2:       # %bb.0: # %entry
557; CHECK-AVX2-NEXT:    movl $0, 4(%rdi)
558; CHECK-AVX2-NEXT:    movl $0, 36(%rdi)
559; CHECK-AVX2-NEXT:    vmovups 16(%rdi), %xmm0
560; CHECK-AVX2-NEXT:    vmovups %xmm0, 16(%rsi)
561; CHECK-AVX2-NEXT:    movl 32(%rdi), %eax
562; CHECK-AVX2-NEXT:    movl %eax, 32(%rsi)
563; CHECK-AVX2-NEXT:    movl 36(%rdi), %eax
564; CHECK-AVX2-NEXT:    movl %eax, 36(%rsi)
565; CHECK-AVX2-NEXT:    movq 40(%rdi), %rax
566; CHECK-AVX2-NEXT:    movq %rax, 40(%rsi)
567; CHECK-AVX2-NEXT:    movl (%rdi), %eax
568; CHECK-AVX2-NEXT:    movl %eax, (%rsi)
569; CHECK-AVX2-NEXT:    movl 4(%rdi), %eax
570; CHECK-AVX2-NEXT:    movl %eax, 4(%rsi)
571; CHECK-AVX2-NEXT:    vmovups 8(%rdi), %xmm0
572; CHECK-AVX2-NEXT:    vmovups %xmm0, 8(%rsi)
573; CHECK-AVX2-NEXT:    movq 24(%rdi), %rax
574; CHECK-AVX2-NEXT:    movq %rax, 24(%rsi)
575; CHECK-AVX2-NEXT:    retq
576;
577; CHECK-AVX512-LABEL: test_multiple_blocks:
578; CHECK-AVX512:       # %bb.0: # %entry
579; CHECK-AVX512-NEXT:    movl $0, 4(%rdi)
580; CHECK-AVX512-NEXT:    movl $0, 36(%rdi)
581; CHECK-AVX512-NEXT:    vmovups 16(%rdi), %xmm0
582; CHECK-AVX512-NEXT:    vmovups %xmm0, 16(%rsi)
583; CHECK-AVX512-NEXT:    movl 32(%rdi), %eax
584; CHECK-AVX512-NEXT:    movl %eax, 32(%rsi)
585; CHECK-AVX512-NEXT:    movl 36(%rdi), %eax
586; CHECK-AVX512-NEXT:    movl %eax, 36(%rsi)
587; CHECK-AVX512-NEXT:    movq 40(%rdi), %rax
588; CHECK-AVX512-NEXT:    movq %rax, 40(%rsi)
589; CHECK-AVX512-NEXT:    movl (%rdi), %eax
590; CHECK-AVX512-NEXT:    movl %eax, (%rsi)
591; CHECK-AVX512-NEXT:    movl 4(%rdi), %eax
592; CHECK-AVX512-NEXT:    movl %eax, 4(%rsi)
593; CHECK-AVX512-NEXT:    vmovups 8(%rdi), %xmm0
594; CHECK-AVX512-NEXT:    vmovups %xmm0, 8(%rsi)
595; CHECK-AVX512-NEXT:    movq 24(%rdi), %rax
596; CHECK-AVX512-NEXT:    movq %rax, 24(%rsi)
597; CHECK-AVX512-NEXT:    retq
598entry:
599  %b = getelementptr inbounds %struct.S4, ptr %s1, i64 0, i32 1
600  store i32 0, ptr %b, align 4
601  %b3 = getelementptr inbounds %struct.S4, ptr %s1, i64 0, i32 9
602  store i32 0, ptr %b3, align 4
603  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 48, i32 4, i1 false)
604  ret void
605}
606%struct.S5 = type { i16, i16, i16, i16, i16, i16, i16, i16 }
607
608; Function Attrs: nounwind uwtable
609define void @test_type16(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4) local_unnamed_addr #0 {
610; CHECK-LABEL: test_type16:
611; CHECK:       # %bb.0: # %entry
612; CHECK-NEXT:    cmpl $18, %edx
613; CHECK-NEXT:    jl .LBB7_2
614; CHECK-NEXT:  # %bb.1: # %if.then
615; CHECK-NEXT:    movw %dx, 2(%rdi)
616; CHECK-NEXT:  .LBB7_2: # %if.end
617; CHECK-NEXT:    movups (%r8), %xmm0
618; CHECK-NEXT:    movups %xmm0, (%rcx)
619; CHECK-NEXT:    movzwl (%rdi), %eax
620; CHECK-NEXT:    movw %ax, (%rsi)
621; CHECK-NEXT:    movzwl 2(%rdi), %eax
622; CHECK-NEXT:    movw %ax, 2(%rsi)
623; CHECK-NEXT:    movq 4(%rdi), %rax
624; CHECK-NEXT:    movq %rax, 4(%rsi)
625; CHECK-NEXT:    movl 12(%rdi), %eax
626; CHECK-NEXT:    movl %eax, 12(%rsi)
627; CHECK-NEXT:    retq
628;
629; DISABLED-LABEL: test_type16:
630; DISABLED:       # %bb.0: # %entry
631; DISABLED-NEXT:    cmpl $18, %edx
632; DISABLED-NEXT:    jl .LBB7_2
633; DISABLED-NEXT:  # %bb.1: # %if.then
634; DISABLED-NEXT:    movw %dx, 2(%rdi)
635; DISABLED-NEXT:  .LBB7_2: # %if.end
636; DISABLED-NEXT:    movups (%r8), %xmm0
637; DISABLED-NEXT:    movups %xmm0, (%rcx)
638; DISABLED-NEXT:    movups (%rdi), %xmm0
639; DISABLED-NEXT:    movups %xmm0, (%rsi)
640; DISABLED-NEXT:    retq
641;
642; CHECK-AVX2-LABEL: test_type16:
643; CHECK-AVX2:       # %bb.0: # %entry
644; CHECK-AVX2-NEXT:    cmpl $18, %edx
645; CHECK-AVX2-NEXT:    jl .LBB7_2
646; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
647; CHECK-AVX2-NEXT:    movw %dx, 2(%rdi)
648; CHECK-AVX2-NEXT:  .LBB7_2: # %if.end
649; CHECK-AVX2-NEXT:    vmovups (%r8), %xmm0
650; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rcx)
651; CHECK-AVX2-NEXT:    movzwl (%rdi), %eax
652; CHECK-AVX2-NEXT:    movw %ax, (%rsi)
653; CHECK-AVX2-NEXT:    movzwl 2(%rdi), %eax
654; CHECK-AVX2-NEXT:    movw %ax, 2(%rsi)
655; CHECK-AVX2-NEXT:    movq 4(%rdi), %rax
656; CHECK-AVX2-NEXT:    movq %rax, 4(%rsi)
657; CHECK-AVX2-NEXT:    movl 12(%rdi), %eax
658; CHECK-AVX2-NEXT:    movl %eax, 12(%rsi)
659; CHECK-AVX2-NEXT:    retq
660;
661; CHECK-AVX512-LABEL: test_type16:
662; CHECK-AVX512:       # %bb.0: # %entry
663; CHECK-AVX512-NEXT:    cmpl $18, %edx
664; CHECK-AVX512-NEXT:    jl .LBB7_2
665; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
666; CHECK-AVX512-NEXT:    movw %dx, 2(%rdi)
667; CHECK-AVX512-NEXT:  .LBB7_2: # %if.end
668; CHECK-AVX512-NEXT:    vmovups (%r8), %xmm0
669; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rcx)
670; CHECK-AVX512-NEXT:    movzwl (%rdi), %eax
671; CHECK-AVX512-NEXT:    movw %ax, (%rsi)
672; CHECK-AVX512-NEXT:    movzwl 2(%rdi), %eax
673; CHECK-AVX512-NEXT:    movw %ax, 2(%rsi)
674; CHECK-AVX512-NEXT:    movq 4(%rdi), %rax
675; CHECK-AVX512-NEXT:    movq %rax, 4(%rsi)
676; CHECK-AVX512-NEXT:    movl 12(%rdi), %eax
677; CHECK-AVX512-NEXT:    movl %eax, 12(%rsi)
678; CHECK-AVX512-NEXT:    retq
679entry:
680  %cmp = icmp sgt i32 %x, 17
681  br i1 %cmp, label %if.then, label %if.end
682
683if.then:                                          ; preds = %entry
684  %conv = trunc i32 %x to i16
685  %b = getelementptr inbounds %struct.S5, ptr %s1, i64 0, i32 1
686  store i16 %conv, ptr %b, align 2
687  br label %if.end
688
689if.end:                                           ; preds = %if.then, %entry
690  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 16, i32 2, i1 false)
691  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 2, i1 false)
692  ret void
693}
694
695%struct.S6 = type { [4 x i32], i32, i32, i32, i32 }
696
697; Function Attrs: nounwind uwtable
698define void @test_stack(ptr noalias nocapture sret(%struct.S6) %agg.result, ptr byval(%struct.S6) nocapture readnone align 8 %s1, ptr byval(%struct.S6) nocapture align 8 %s2, i32 %x) local_unnamed_addr #0 {
699; CHECK-LABEL: test_stack:
700; CHECK:       # %bb.0: # %entry
701; CHECK-NEXT:    movq %rdi, %rax
702; CHECK-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
703; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
704; CHECK-NEXT:    movups %xmm0, (%rdi)
705; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
706; CHECK-NEXT:    movq %rcx, 16(%rdi)
707; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
708; CHECK-NEXT:    movl %ecx, 24(%rdi)
709; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
710; CHECK-NEXT:    movl %ecx, 28(%rdi)
711; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
712; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
713; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %edx
714; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %esi
715; CHECK-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
716; CHECK-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
717; CHECK-NEXT:    movl %edx, {{[0-9]+}}(%rsp)
718; CHECK-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
719; CHECK-NEXT:    retq
720;
721; DISABLED-LABEL: test_stack:
722; DISABLED:       # %bb.0: # %entry
723; DISABLED-NEXT:    movq %rdi, %rax
724; DISABLED-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
725; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
726; DISABLED-NEXT:    movups %xmm0, (%rdi)
727; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
728; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
729; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
730; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
731; DISABLED-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
732; DISABLED-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
733; DISABLED-NEXT:    retq
734;
735; CHECK-AVX2-LABEL: test_stack:
736; CHECK-AVX2:       # %bb.0: # %entry
737; CHECK-AVX2-NEXT:    movq %rdi, %rax
738; CHECK-AVX2-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
739; CHECK-AVX2-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
740; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rdi)
741; CHECK-AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
742; CHECK-AVX2-NEXT:    movq %rcx, 16(%rdi)
743; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
744; CHECK-AVX2-NEXT:    movl %ecx, 24(%rdi)
745; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
746; CHECK-AVX2-NEXT:    movl %ecx, 28(%rdi)
747; CHECK-AVX2-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
748; CHECK-AVX2-NEXT:    vmovups %xmm0, {{[0-9]+}}(%rsp)
749; CHECK-AVX2-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
750; CHECK-AVX2-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
751; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
752; CHECK-AVX2-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
753; CHECK-AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
754; CHECK-AVX2-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
755; CHECK-AVX2-NEXT:    retq
756;
757; CHECK-AVX512-LABEL: test_stack:
758; CHECK-AVX512:       # %bb.0: # %entry
759; CHECK-AVX512-NEXT:    movq %rdi, %rax
760; CHECK-AVX512-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
761; CHECK-AVX512-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
762; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rdi)
763; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
764; CHECK-AVX512-NEXT:    movq %rcx, 16(%rdi)
765; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
766; CHECK-AVX512-NEXT:    movl %ecx, 24(%rdi)
767; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
768; CHECK-AVX512-NEXT:    movl %ecx, 28(%rdi)
769; CHECK-AVX512-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
770; CHECK-AVX512-NEXT:    vmovups %xmm0, {{[0-9]+}}(%rsp)
771; CHECK-AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
772; CHECK-AVX512-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
773; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
774; CHECK-AVX512-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
775; CHECK-AVX512-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
776; CHECK-AVX512-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
777; CHECK-AVX512-NEXT:    retq
778entry:
779  %s6.sroa.3.0..sroa_idx4 = getelementptr inbounds %struct.S6, ptr %s2, i64 0, i32 3
780  store i32 %x, ptr %s6.sroa.3.0..sroa_idx4, align 8
781  call void @llvm.memcpy.p0.p0.i64(ptr %agg.result, ptr nonnull %s2, i64 32, i32 4, i1 false)
782  call void @llvm.memcpy.p0.p0.i64(ptr nonnull %s1, ptr nonnull %s2, i64 32, i32 4, i1 false)
783
784  ret void
785}
786
787; Function Attrs: nounwind uwtable
788define void @test_limit_all(ptr noalias  %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
789; CHECK-LABEL: test_limit_all:
790; CHECK:       # %bb.0: # %entry
791; CHECK-NEXT:    pushq %rbp
792; CHECK-NEXT:    .cfi_def_cfa_offset 16
793; CHECK-NEXT:    pushq %r15
794; CHECK-NEXT:    .cfi_def_cfa_offset 24
795; CHECK-NEXT:    pushq %r14
796; CHECK-NEXT:    .cfi_def_cfa_offset 32
797; CHECK-NEXT:    pushq %r12
798; CHECK-NEXT:    .cfi_def_cfa_offset 40
799; CHECK-NEXT:    pushq %rbx
800; CHECK-NEXT:    .cfi_def_cfa_offset 48
801; CHECK-NEXT:    .cfi_offset %rbx, -48
802; CHECK-NEXT:    .cfi_offset %r12, -40
803; CHECK-NEXT:    .cfi_offset %r14, -32
804; CHECK-NEXT:    .cfi_offset %r15, -24
805; CHECK-NEXT:    .cfi_offset %rbp, -16
806; CHECK-NEXT:    movq %r8, %r15
807; CHECK-NEXT:    movq %rcx, %r14
808; CHECK-NEXT:    movl %edx, %ebp
809; CHECK-NEXT:    movq %rsi, %r12
810; CHECK-NEXT:    movq %rdi, %rbx
811; CHECK-NEXT:    movl %r9d, 12(%rdi)
812; CHECK-NEXT:    callq bar@PLT
813; CHECK-NEXT:    cmpl $18, %ebp
814; CHECK-NEXT:    jl .LBB9_2
815; CHECK-NEXT:  # %bb.1: # %if.then
816; CHECK-NEXT:    movl %ebp, 4(%rbx)
817; CHECK-NEXT:    movq %rbx, %rdi
818; CHECK-NEXT:    callq bar@PLT
819; CHECK-NEXT:  .LBB9_2: # %if.end
820; CHECK-NEXT:    movups (%r15), %xmm0
821; CHECK-NEXT:    movups %xmm0, (%r14)
822; CHECK-NEXT:    movups (%rbx), %xmm0
823; CHECK-NEXT:    movups %xmm0, (%r12)
824; CHECK-NEXT:    popq %rbx
825; CHECK-NEXT:    .cfi_def_cfa_offset 40
826; CHECK-NEXT:    popq %r12
827; CHECK-NEXT:    .cfi_def_cfa_offset 32
828; CHECK-NEXT:    popq %r14
829; CHECK-NEXT:    .cfi_def_cfa_offset 24
830; CHECK-NEXT:    popq %r15
831; CHECK-NEXT:    .cfi_def_cfa_offset 16
832; CHECK-NEXT:    popq %rbp
833; CHECK-NEXT:    .cfi_def_cfa_offset 8
834; CHECK-NEXT:    retq
835;
836; DISABLED-LABEL: test_limit_all:
837; DISABLED:       # %bb.0: # %entry
838; DISABLED-NEXT:    pushq %rbp
839; DISABLED-NEXT:    .cfi_def_cfa_offset 16
840; DISABLED-NEXT:    pushq %r15
841; DISABLED-NEXT:    .cfi_def_cfa_offset 24
842; DISABLED-NEXT:    pushq %r14
843; DISABLED-NEXT:    .cfi_def_cfa_offset 32
844; DISABLED-NEXT:    pushq %r12
845; DISABLED-NEXT:    .cfi_def_cfa_offset 40
846; DISABLED-NEXT:    pushq %rbx
847; DISABLED-NEXT:    .cfi_def_cfa_offset 48
848; DISABLED-NEXT:    .cfi_offset %rbx, -48
849; DISABLED-NEXT:    .cfi_offset %r12, -40
850; DISABLED-NEXT:    .cfi_offset %r14, -32
851; DISABLED-NEXT:    .cfi_offset %r15, -24
852; DISABLED-NEXT:    .cfi_offset %rbp, -16
853; DISABLED-NEXT:    movq %r8, %r15
854; DISABLED-NEXT:    movq %rcx, %r14
855; DISABLED-NEXT:    movl %edx, %ebp
856; DISABLED-NEXT:    movq %rsi, %r12
857; DISABLED-NEXT:    movq %rdi, %rbx
858; DISABLED-NEXT:    movl %r9d, 12(%rdi)
859; DISABLED-NEXT:    callq bar@PLT
860; DISABLED-NEXT:    cmpl $18, %ebp
861; DISABLED-NEXT:    jl .LBB9_2
862; DISABLED-NEXT:  # %bb.1: # %if.then
863; DISABLED-NEXT:    movl %ebp, 4(%rbx)
864; DISABLED-NEXT:    movq %rbx, %rdi
865; DISABLED-NEXT:    callq bar@PLT
866; DISABLED-NEXT:  .LBB9_2: # %if.end
867; DISABLED-NEXT:    movups (%r15), %xmm0
868; DISABLED-NEXT:    movups %xmm0, (%r14)
869; DISABLED-NEXT:    movups (%rbx), %xmm0
870; DISABLED-NEXT:    movups %xmm0, (%r12)
871; DISABLED-NEXT:    popq %rbx
872; DISABLED-NEXT:    .cfi_def_cfa_offset 40
873; DISABLED-NEXT:    popq %r12
874; DISABLED-NEXT:    .cfi_def_cfa_offset 32
875; DISABLED-NEXT:    popq %r14
876; DISABLED-NEXT:    .cfi_def_cfa_offset 24
877; DISABLED-NEXT:    popq %r15
878; DISABLED-NEXT:    .cfi_def_cfa_offset 16
879; DISABLED-NEXT:    popq %rbp
880; DISABLED-NEXT:    .cfi_def_cfa_offset 8
881; DISABLED-NEXT:    retq
882;
883; CHECK-AVX2-LABEL: test_limit_all:
884; CHECK-AVX2:       # %bb.0: # %entry
885; CHECK-AVX2-NEXT:    pushq %rbp
886; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 16
887; CHECK-AVX2-NEXT:    pushq %r15
888; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 24
889; CHECK-AVX2-NEXT:    pushq %r14
890; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 32
891; CHECK-AVX2-NEXT:    pushq %r12
892; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 40
893; CHECK-AVX2-NEXT:    pushq %rbx
894; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 48
895; CHECK-AVX2-NEXT:    .cfi_offset %rbx, -48
896; CHECK-AVX2-NEXT:    .cfi_offset %r12, -40
897; CHECK-AVX2-NEXT:    .cfi_offset %r14, -32
898; CHECK-AVX2-NEXT:    .cfi_offset %r15, -24
899; CHECK-AVX2-NEXT:    .cfi_offset %rbp, -16
900; CHECK-AVX2-NEXT:    movq %r8, %r15
901; CHECK-AVX2-NEXT:    movq %rcx, %r14
902; CHECK-AVX2-NEXT:    movl %edx, %ebp
903; CHECK-AVX2-NEXT:    movq %rsi, %r12
904; CHECK-AVX2-NEXT:    movq %rdi, %rbx
905; CHECK-AVX2-NEXT:    movl %r9d, 12(%rdi)
906; CHECK-AVX2-NEXT:    callq bar@PLT
907; CHECK-AVX2-NEXT:    cmpl $18, %ebp
908; CHECK-AVX2-NEXT:    jl .LBB9_2
909; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
910; CHECK-AVX2-NEXT:    movl %ebp, 4(%rbx)
911; CHECK-AVX2-NEXT:    movq %rbx, %rdi
912; CHECK-AVX2-NEXT:    callq bar@PLT
913; CHECK-AVX2-NEXT:  .LBB9_2: # %if.end
914; CHECK-AVX2-NEXT:    vmovups (%r15), %xmm0
915; CHECK-AVX2-NEXT:    vmovups %xmm0, (%r14)
916; CHECK-AVX2-NEXT:    vmovups (%rbx), %xmm0
917; CHECK-AVX2-NEXT:    vmovups %xmm0, (%r12)
918; CHECK-AVX2-NEXT:    popq %rbx
919; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 40
920; CHECK-AVX2-NEXT:    popq %r12
921; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 32
922; CHECK-AVX2-NEXT:    popq %r14
923; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 24
924; CHECK-AVX2-NEXT:    popq %r15
925; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 16
926; CHECK-AVX2-NEXT:    popq %rbp
927; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 8
928; CHECK-AVX2-NEXT:    retq
929;
930; CHECK-AVX512-LABEL: test_limit_all:
931; CHECK-AVX512:       # %bb.0: # %entry
932; CHECK-AVX512-NEXT:    pushq %rbp
933; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 16
934; CHECK-AVX512-NEXT:    pushq %r15
935; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 24
936; CHECK-AVX512-NEXT:    pushq %r14
937; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 32
938; CHECK-AVX512-NEXT:    pushq %r12
939; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 40
940; CHECK-AVX512-NEXT:    pushq %rbx
941; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 48
942; CHECK-AVX512-NEXT:    .cfi_offset %rbx, -48
943; CHECK-AVX512-NEXT:    .cfi_offset %r12, -40
944; CHECK-AVX512-NEXT:    .cfi_offset %r14, -32
945; CHECK-AVX512-NEXT:    .cfi_offset %r15, -24
946; CHECK-AVX512-NEXT:    .cfi_offset %rbp, -16
947; CHECK-AVX512-NEXT:    movq %r8, %r15
948; CHECK-AVX512-NEXT:    movq %rcx, %r14
949; CHECK-AVX512-NEXT:    movl %edx, %ebp
950; CHECK-AVX512-NEXT:    movq %rsi, %r12
951; CHECK-AVX512-NEXT:    movq %rdi, %rbx
952; CHECK-AVX512-NEXT:    movl %r9d, 12(%rdi)
953; CHECK-AVX512-NEXT:    callq bar@PLT
954; CHECK-AVX512-NEXT:    cmpl $18, %ebp
955; CHECK-AVX512-NEXT:    jl .LBB9_2
956; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
957; CHECK-AVX512-NEXT:    movl %ebp, 4(%rbx)
958; CHECK-AVX512-NEXT:    movq %rbx, %rdi
959; CHECK-AVX512-NEXT:    callq bar@PLT
960; CHECK-AVX512-NEXT:  .LBB9_2: # %if.end
961; CHECK-AVX512-NEXT:    vmovups (%r15), %xmm0
962; CHECK-AVX512-NEXT:    vmovups %xmm0, (%r14)
963; CHECK-AVX512-NEXT:    vmovups (%rbx), %xmm0
964; CHECK-AVX512-NEXT:    vmovups %xmm0, (%r12)
965; CHECK-AVX512-NEXT:    popq %rbx
966; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 40
967; CHECK-AVX512-NEXT:    popq %r12
968; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 32
969; CHECK-AVX512-NEXT:    popq %r14
970; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 24
971; CHECK-AVX512-NEXT:    popq %r15
972; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 16
973; CHECK-AVX512-NEXT:    popq %rbp
974; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 8
975; CHECK-AVX512-NEXT:    retq
976entry:
977  %d = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 3
978  store i32 %x2, ptr %d, align 4
979  tail call void @bar(ptr %s1) #3
980  %cmp = icmp sgt i32 %x, 17
981  br i1 %cmp, label %if.then, label %if.end
982
983if.then:                                          ; preds = %entry
984  %b = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 1
985  store i32 %x, ptr %b, align 4
986  tail call void @bar(ptr nonnull %s1) #3
987  br label %if.end
988
989if.end:                                           ; preds = %if.then, %entry
990  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 16, i32 4, i1 false)
991  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 4, i1 false)
992  ret void
993}
994
995; Function Attrs: nounwind uwtable
996define void @test_limit_one_pred(ptr noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
997; CHECK-LABEL: test_limit_one_pred:
998; CHECK:       # %bb.0: # %entry
999; CHECK-NEXT:    pushq %r15
1000; CHECK-NEXT:    .cfi_def_cfa_offset 16
1001; CHECK-NEXT:    pushq %r14
1002; CHECK-NEXT:    .cfi_def_cfa_offset 24
1003; CHECK-NEXT:    pushq %r12
1004; CHECK-NEXT:    .cfi_def_cfa_offset 32
1005; CHECK-NEXT:    pushq %rbx
1006; CHECK-NEXT:    .cfi_def_cfa_offset 40
1007; CHECK-NEXT:    pushq %rax
1008; CHECK-NEXT:    .cfi_def_cfa_offset 48
1009; CHECK-NEXT:    .cfi_offset %rbx, -40
1010; CHECK-NEXT:    .cfi_offset %r12, -32
1011; CHECK-NEXT:    .cfi_offset %r14, -24
1012; CHECK-NEXT:    .cfi_offset %r15, -16
1013; CHECK-NEXT:    movq %r8, %r12
1014; CHECK-NEXT:    movq %rcx, %r15
1015; CHECK-NEXT:    movq %rsi, %r14
1016; CHECK-NEXT:    movq %rdi, %rbx
1017; CHECK-NEXT:    movl %r9d, 12(%rdi)
1018; CHECK-NEXT:    cmpl $18, %edx
1019; CHECK-NEXT:    jl .LBB10_2
1020; CHECK-NEXT:  # %bb.1: # %if.then
1021; CHECK-NEXT:    movl %edx, 4(%rbx)
1022; CHECK-NEXT:    movq %rbx, %rdi
1023; CHECK-NEXT:    callq bar@PLT
1024; CHECK-NEXT:  .LBB10_2: # %if.end
1025; CHECK-NEXT:    movups (%r12), %xmm0
1026; CHECK-NEXT:    movups %xmm0, (%r15)
1027; CHECK-NEXT:    movq (%rbx), %rax
1028; CHECK-NEXT:    movq %rax, (%r14)
1029; CHECK-NEXT:    movl 8(%rbx), %eax
1030; CHECK-NEXT:    movl %eax, 8(%r14)
1031; CHECK-NEXT:    movl 12(%rbx), %eax
1032; CHECK-NEXT:    movl %eax, 12(%r14)
1033; CHECK-NEXT:    addq $8, %rsp
1034; CHECK-NEXT:    .cfi_def_cfa_offset 40
1035; CHECK-NEXT:    popq %rbx
1036; CHECK-NEXT:    .cfi_def_cfa_offset 32
1037; CHECK-NEXT:    popq %r12
1038; CHECK-NEXT:    .cfi_def_cfa_offset 24
1039; CHECK-NEXT:    popq %r14
1040; CHECK-NEXT:    .cfi_def_cfa_offset 16
1041; CHECK-NEXT:    popq %r15
1042; CHECK-NEXT:    .cfi_def_cfa_offset 8
1043; CHECK-NEXT:    retq
1044;
1045; DISABLED-LABEL: test_limit_one_pred:
1046; DISABLED:       # %bb.0: # %entry
1047; DISABLED-NEXT:    pushq %r15
1048; DISABLED-NEXT:    .cfi_def_cfa_offset 16
1049; DISABLED-NEXT:    pushq %r14
1050; DISABLED-NEXT:    .cfi_def_cfa_offset 24
1051; DISABLED-NEXT:    pushq %r12
1052; DISABLED-NEXT:    .cfi_def_cfa_offset 32
1053; DISABLED-NEXT:    pushq %rbx
1054; DISABLED-NEXT:    .cfi_def_cfa_offset 40
1055; DISABLED-NEXT:    pushq %rax
1056; DISABLED-NEXT:    .cfi_def_cfa_offset 48
1057; DISABLED-NEXT:    .cfi_offset %rbx, -40
1058; DISABLED-NEXT:    .cfi_offset %r12, -32
1059; DISABLED-NEXT:    .cfi_offset %r14, -24
1060; DISABLED-NEXT:    .cfi_offset %r15, -16
1061; DISABLED-NEXT:    movq %r8, %r15
1062; DISABLED-NEXT:    movq %rcx, %r14
1063; DISABLED-NEXT:    movq %rsi, %r12
1064; DISABLED-NEXT:    movq %rdi, %rbx
1065; DISABLED-NEXT:    movl %r9d, 12(%rdi)
1066; DISABLED-NEXT:    cmpl $18, %edx
1067; DISABLED-NEXT:    jl .LBB10_2
1068; DISABLED-NEXT:  # %bb.1: # %if.then
1069; DISABLED-NEXT:    movl %edx, 4(%rbx)
1070; DISABLED-NEXT:    movq %rbx, %rdi
1071; DISABLED-NEXT:    callq bar@PLT
1072; DISABLED-NEXT:  .LBB10_2: # %if.end
1073; DISABLED-NEXT:    movups (%r15), %xmm0
1074; DISABLED-NEXT:    movups %xmm0, (%r14)
1075; DISABLED-NEXT:    movups (%rbx), %xmm0
1076; DISABLED-NEXT:    movups %xmm0, (%r12)
1077; DISABLED-NEXT:    addq $8, %rsp
1078; DISABLED-NEXT:    .cfi_def_cfa_offset 40
1079; DISABLED-NEXT:    popq %rbx
1080; DISABLED-NEXT:    .cfi_def_cfa_offset 32
1081; DISABLED-NEXT:    popq %r12
1082; DISABLED-NEXT:    .cfi_def_cfa_offset 24
1083; DISABLED-NEXT:    popq %r14
1084; DISABLED-NEXT:    .cfi_def_cfa_offset 16
1085; DISABLED-NEXT:    popq %r15
1086; DISABLED-NEXT:    .cfi_def_cfa_offset 8
1087; DISABLED-NEXT:    retq
1088;
1089; CHECK-AVX2-LABEL: test_limit_one_pred:
1090; CHECK-AVX2:       # %bb.0: # %entry
1091; CHECK-AVX2-NEXT:    pushq %r15
1092; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 16
1093; CHECK-AVX2-NEXT:    pushq %r14
1094; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 24
1095; CHECK-AVX2-NEXT:    pushq %r12
1096; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 32
1097; CHECK-AVX2-NEXT:    pushq %rbx
1098; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 40
1099; CHECK-AVX2-NEXT:    pushq %rax
1100; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 48
1101; CHECK-AVX2-NEXT:    .cfi_offset %rbx, -40
1102; CHECK-AVX2-NEXT:    .cfi_offset %r12, -32
1103; CHECK-AVX2-NEXT:    .cfi_offset %r14, -24
1104; CHECK-AVX2-NEXT:    .cfi_offset %r15, -16
1105; CHECK-AVX2-NEXT:    movq %r8, %r12
1106; CHECK-AVX2-NEXT:    movq %rcx, %r15
1107; CHECK-AVX2-NEXT:    movq %rsi, %r14
1108; CHECK-AVX2-NEXT:    movq %rdi, %rbx
1109; CHECK-AVX2-NEXT:    movl %r9d, 12(%rdi)
1110; CHECK-AVX2-NEXT:    cmpl $18, %edx
1111; CHECK-AVX2-NEXT:    jl .LBB10_2
1112; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
1113; CHECK-AVX2-NEXT:    movl %edx, 4(%rbx)
1114; CHECK-AVX2-NEXT:    movq %rbx, %rdi
1115; CHECK-AVX2-NEXT:    callq bar@PLT
1116; CHECK-AVX2-NEXT:  .LBB10_2: # %if.end
1117; CHECK-AVX2-NEXT:    vmovups (%r12), %xmm0
1118; CHECK-AVX2-NEXT:    vmovups %xmm0, (%r15)
1119; CHECK-AVX2-NEXT:    movq (%rbx), %rax
1120; CHECK-AVX2-NEXT:    movq %rax, (%r14)
1121; CHECK-AVX2-NEXT:    movl 8(%rbx), %eax
1122; CHECK-AVX2-NEXT:    movl %eax, 8(%r14)
1123; CHECK-AVX2-NEXT:    movl 12(%rbx), %eax
1124; CHECK-AVX2-NEXT:    movl %eax, 12(%r14)
1125; CHECK-AVX2-NEXT:    addq $8, %rsp
1126; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 40
1127; CHECK-AVX2-NEXT:    popq %rbx
1128; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 32
1129; CHECK-AVX2-NEXT:    popq %r12
1130; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 24
1131; CHECK-AVX2-NEXT:    popq %r14
1132; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 16
1133; CHECK-AVX2-NEXT:    popq %r15
1134; CHECK-AVX2-NEXT:    .cfi_def_cfa_offset 8
1135; CHECK-AVX2-NEXT:    retq
1136;
1137; CHECK-AVX512-LABEL: test_limit_one_pred:
1138; CHECK-AVX512:       # %bb.0: # %entry
1139; CHECK-AVX512-NEXT:    pushq %r15
1140; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 16
1141; CHECK-AVX512-NEXT:    pushq %r14
1142; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 24
1143; CHECK-AVX512-NEXT:    pushq %r12
1144; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 32
1145; CHECK-AVX512-NEXT:    pushq %rbx
1146; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 40
1147; CHECK-AVX512-NEXT:    pushq %rax
1148; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 48
1149; CHECK-AVX512-NEXT:    .cfi_offset %rbx, -40
1150; CHECK-AVX512-NEXT:    .cfi_offset %r12, -32
1151; CHECK-AVX512-NEXT:    .cfi_offset %r14, -24
1152; CHECK-AVX512-NEXT:    .cfi_offset %r15, -16
1153; CHECK-AVX512-NEXT:    movq %r8, %r12
1154; CHECK-AVX512-NEXT:    movq %rcx, %r15
1155; CHECK-AVX512-NEXT:    movq %rsi, %r14
1156; CHECK-AVX512-NEXT:    movq %rdi, %rbx
1157; CHECK-AVX512-NEXT:    movl %r9d, 12(%rdi)
1158; CHECK-AVX512-NEXT:    cmpl $18, %edx
1159; CHECK-AVX512-NEXT:    jl .LBB10_2
1160; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
1161; CHECK-AVX512-NEXT:    movl %edx, 4(%rbx)
1162; CHECK-AVX512-NEXT:    movq %rbx, %rdi
1163; CHECK-AVX512-NEXT:    callq bar@PLT
1164; CHECK-AVX512-NEXT:  .LBB10_2: # %if.end
1165; CHECK-AVX512-NEXT:    vmovups (%r12), %xmm0
1166; CHECK-AVX512-NEXT:    vmovups %xmm0, (%r15)
1167; CHECK-AVX512-NEXT:    movq (%rbx), %rax
1168; CHECK-AVX512-NEXT:    movq %rax, (%r14)
1169; CHECK-AVX512-NEXT:    movl 8(%rbx), %eax
1170; CHECK-AVX512-NEXT:    movl %eax, 8(%r14)
1171; CHECK-AVX512-NEXT:    movl 12(%rbx), %eax
1172; CHECK-AVX512-NEXT:    movl %eax, 12(%r14)
1173; CHECK-AVX512-NEXT:    addq $8, %rsp
1174; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 40
1175; CHECK-AVX512-NEXT:    popq %rbx
1176; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 32
1177; CHECK-AVX512-NEXT:    popq %r12
1178; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 24
1179; CHECK-AVX512-NEXT:    popq %r14
1180; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 16
1181; CHECK-AVX512-NEXT:    popq %r15
1182; CHECK-AVX512-NEXT:    .cfi_def_cfa_offset 8
1183; CHECK-AVX512-NEXT:    retq
1184entry:
1185  %d = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 3
1186  store i32 %x2, ptr %d, align 4
1187  %cmp = icmp sgt i32 %x, 17
1188  br i1 %cmp, label %if.then, label %if.end
1189
1190if.then:                                          ; preds = %entry
1191  %b = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 1
1192  store i32 %x, ptr %b, align 4
1193  tail call void @bar(ptr nonnull %s1) #3
1194  br label %if.end
1195
1196if.end:                                           ; preds = %if.then, %entry
1197  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 16, i32 4, i1 false)
1198  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 4, i1 false)
1199  ret void
1200}
1201
1202
1203declare void @bar(ptr) local_unnamed_addr #1
1204
1205
1206%struct.S7 = type { float, float, float , float, float, float, float, float }
1207
1208; Function Attrs: nounwind uwtable
1209define void @test_conditional_block_float(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4, float %y) local_unnamed_addr #0 {
1210; CHECK-LABEL: test_conditional_block_float:
1211; CHECK:       # %bb.0: # %entry
1212; CHECK-NEXT:    cmpl $18, %edx
1213; CHECK-NEXT:    jl .LBB11_2
1214; CHECK-NEXT:  # %bb.1: # %if.then
1215; CHECK-NEXT:    movl $1065353216, 4(%rdi) # imm = 0x3F800000
1216; CHECK-NEXT:  .LBB11_2: # %if.end
1217; CHECK-NEXT:    movups (%r8), %xmm0
1218; CHECK-NEXT:    movups 16(%r8), %xmm1
1219; CHECK-NEXT:    movups %xmm1, 16(%rcx)
1220; CHECK-NEXT:    movups %xmm0, (%rcx)
1221; CHECK-NEXT:    movl (%rdi), %eax
1222; CHECK-NEXT:    movl 4(%rdi), %ecx
1223; CHECK-NEXT:    movq 8(%rdi), %rdx
1224; CHECK-NEXT:    movups 16(%rdi), %xmm0
1225; CHECK-NEXT:    movups %xmm0, 16(%rsi)
1226; CHECK-NEXT:    movl %eax, (%rsi)
1227; CHECK-NEXT:    movl %ecx, 4(%rsi)
1228; CHECK-NEXT:    movq %rdx, 8(%rsi)
1229; CHECK-NEXT:    retq
1230;
1231; DISABLED-LABEL: test_conditional_block_float:
1232; DISABLED:       # %bb.0: # %entry
1233; DISABLED-NEXT:    cmpl $18, %edx
1234; DISABLED-NEXT:    jl .LBB11_2
1235; DISABLED-NEXT:  # %bb.1: # %if.then
1236; DISABLED-NEXT:    movl $1065353216, 4(%rdi) # imm = 0x3F800000
1237; DISABLED-NEXT:  .LBB11_2: # %if.end
1238; DISABLED-NEXT:    movups (%r8), %xmm0
1239; DISABLED-NEXT:    movups 16(%r8), %xmm1
1240; DISABLED-NEXT:    movups %xmm1, 16(%rcx)
1241; DISABLED-NEXT:    movups %xmm0, (%rcx)
1242; DISABLED-NEXT:    movups (%rdi), %xmm0
1243; DISABLED-NEXT:    movups 16(%rdi), %xmm1
1244; DISABLED-NEXT:    movups %xmm1, 16(%rsi)
1245; DISABLED-NEXT:    movups %xmm0, (%rsi)
1246; DISABLED-NEXT:    retq
1247;
1248; CHECK-AVX2-LABEL: test_conditional_block_float:
1249; CHECK-AVX2:       # %bb.0: # %entry
1250; CHECK-AVX2-NEXT:    cmpl $18, %edx
1251; CHECK-AVX2-NEXT:    jl .LBB11_2
1252; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
1253; CHECK-AVX2-NEXT:    movl $1065353216, 4(%rdi) # imm = 0x3F800000
1254; CHECK-AVX2-NEXT:  .LBB11_2: # %if.end
1255; CHECK-AVX2-NEXT:    vmovups (%r8), %ymm0
1256; CHECK-AVX2-NEXT:    vmovups %ymm0, (%rcx)
1257; CHECK-AVX2-NEXT:    movl (%rdi), %eax
1258; CHECK-AVX2-NEXT:    movl %eax, (%rsi)
1259; CHECK-AVX2-NEXT:    movl 4(%rdi), %eax
1260; CHECK-AVX2-NEXT:    movl %eax, 4(%rsi)
1261; CHECK-AVX2-NEXT:    vmovups 8(%rdi), %xmm0
1262; CHECK-AVX2-NEXT:    vmovups %xmm0, 8(%rsi)
1263; CHECK-AVX2-NEXT:    movq 24(%rdi), %rax
1264; CHECK-AVX2-NEXT:    movq %rax, 24(%rsi)
1265; CHECK-AVX2-NEXT:    vzeroupper
1266; CHECK-AVX2-NEXT:    retq
1267;
1268; CHECK-AVX512-LABEL: test_conditional_block_float:
1269; CHECK-AVX512:       # %bb.0: # %entry
1270; CHECK-AVX512-NEXT:    cmpl $18, %edx
1271; CHECK-AVX512-NEXT:    jl .LBB11_2
1272; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
1273; CHECK-AVX512-NEXT:    movl $1065353216, 4(%rdi) # imm = 0x3F800000
1274; CHECK-AVX512-NEXT:  .LBB11_2: # %if.end
1275; CHECK-AVX512-NEXT:    vmovups (%r8), %ymm0
1276; CHECK-AVX512-NEXT:    vmovups %ymm0, (%rcx)
1277; CHECK-AVX512-NEXT:    movl (%rdi), %eax
1278; CHECK-AVX512-NEXT:    movl %eax, (%rsi)
1279; CHECK-AVX512-NEXT:    movl 4(%rdi), %eax
1280; CHECK-AVX512-NEXT:    movl %eax, 4(%rsi)
1281; CHECK-AVX512-NEXT:    vmovups 8(%rdi), %xmm0
1282; CHECK-AVX512-NEXT:    vmovups %xmm0, 8(%rsi)
1283; CHECK-AVX512-NEXT:    movq 24(%rdi), %rax
1284; CHECK-AVX512-NEXT:    movq %rax, 24(%rsi)
1285; CHECK-AVX512-NEXT:    vzeroupper
1286; CHECK-AVX512-NEXT:    retq
1287entry:
1288  %cmp = icmp sgt i32 %x, 17
1289  br i1 %cmp, label %if.then, label %if.end
1290
1291if.then:                                          ; preds = %entry
1292  %b = getelementptr inbounds %struct.S7, ptr %s1, i64 0, i32 1
1293  store float 1.0, ptr %b, align 4
1294  br label %if.end
1295
1296if.end:                                           ; preds = %if.then, %entry
1297  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 32, i32 4, i1 false)
1298  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 32, i32 4, i1 false)
1299  ret void
1300}
1301
1302%struct.S8 = type { i64, i64, i64, i64, i64, i64 }
1303
1304; Function Attrs: nounwind uwtable
1305define void @test_conditional_block_ymm(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4) local_unnamed_addr #0 {
1306; CHECK-LABEL: test_conditional_block_ymm:
1307; CHECK:       # %bb.0: # %entry
1308; CHECK-NEXT:    cmpl $18, %edx
1309; CHECK-NEXT:    jl .LBB12_2
1310; CHECK-NEXT:  # %bb.1: # %if.then
1311; CHECK-NEXT:    movq $1, 8(%rdi)
1312; CHECK-NEXT:  .LBB12_2: # %if.end
1313; CHECK-NEXT:    movups (%r8), %xmm0
1314; CHECK-NEXT:    movups 16(%r8), %xmm1
1315; CHECK-NEXT:    movups %xmm1, 16(%rcx)
1316; CHECK-NEXT:    movups %xmm0, (%rcx)
1317; CHECK-NEXT:    movq (%rdi), %rax
1318; CHECK-NEXT:    movq 8(%rdi), %rcx
1319; CHECK-NEXT:    movups 16(%rdi), %xmm0
1320; CHECK-NEXT:    movups %xmm0, 16(%rsi)
1321; CHECK-NEXT:    movq %rax, (%rsi)
1322; CHECK-NEXT:    movq %rcx, 8(%rsi)
1323; CHECK-NEXT:    retq
1324;
1325; DISABLED-LABEL: test_conditional_block_ymm:
1326; DISABLED:       # %bb.0: # %entry
1327; DISABLED-NEXT:    cmpl $18, %edx
1328; DISABLED-NEXT:    jl .LBB12_2
1329; DISABLED-NEXT:  # %bb.1: # %if.then
1330; DISABLED-NEXT:    movq $1, 8(%rdi)
1331; DISABLED-NEXT:  .LBB12_2: # %if.end
1332; DISABLED-NEXT:    movups (%r8), %xmm0
1333; DISABLED-NEXT:    movups 16(%r8), %xmm1
1334; DISABLED-NEXT:    movups %xmm1, 16(%rcx)
1335; DISABLED-NEXT:    movups %xmm0, (%rcx)
1336; DISABLED-NEXT:    movups (%rdi), %xmm0
1337; DISABLED-NEXT:    movups 16(%rdi), %xmm1
1338; DISABLED-NEXT:    movups %xmm1, 16(%rsi)
1339; DISABLED-NEXT:    movups %xmm0, (%rsi)
1340; DISABLED-NEXT:    retq
1341;
1342; CHECK-AVX2-LABEL: test_conditional_block_ymm:
1343; CHECK-AVX2:       # %bb.0: # %entry
1344; CHECK-AVX2-NEXT:    cmpl $18, %edx
1345; CHECK-AVX2-NEXT:    jl .LBB12_2
1346; CHECK-AVX2-NEXT:  # %bb.1: # %if.then
1347; CHECK-AVX2-NEXT:    movq $1, 8(%rdi)
1348; CHECK-AVX2-NEXT:  .LBB12_2: # %if.end
1349; CHECK-AVX2-NEXT:    vmovups (%r8), %ymm0
1350; CHECK-AVX2-NEXT:    vmovups %ymm0, (%rcx)
1351; CHECK-AVX2-NEXT:    movq (%rdi), %rax
1352; CHECK-AVX2-NEXT:    movq %rax, (%rsi)
1353; CHECK-AVX2-NEXT:    movq 8(%rdi), %rax
1354; CHECK-AVX2-NEXT:    movq %rax, 8(%rsi)
1355; CHECK-AVX2-NEXT:    vmovups 16(%rdi), %xmm0
1356; CHECK-AVX2-NEXT:    vmovups %xmm0, 16(%rsi)
1357; CHECK-AVX2-NEXT:    vzeroupper
1358; CHECK-AVX2-NEXT:    retq
1359;
1360; CHECK-AVX512-LABEL: test_conditional_block_ymm:
1361; CHECK-AVX512:       # %bb.0: # %entry
1362; CHECK-AVX512-NEXT:    cmpl $18, %edx
1363; CHECK-AVX512-NEXT:    jl .LBB12_2
1364; CHECK-AVX512-NEXT:  # %bb.1: # %if.then
1365; CHECK-AVX512-NEXT:    movq $1, 8(%rdi)
1366; CHECK-AVX512-NEXT:  .LBB12_2: # %if.end
1367; CHECK-AVX512-NEXT:    vmovups (%r8), %ymm0
1368; CHECK-AVX512-NEXT:    vmovups %ymm0, (%rcx)
1369; CHECK-AVX512-NEXT:    movq (%rdi), %rax
1370; CHECK-AVX512-NEXT:    movq %rax, (%rsi)
1371; CHECK-AVX512-NEXT:    movq 8(%rdi), %rax
1372; CHECK-AVX512-NEXT:    movq %rax, 8(%rsi)
1373; CHECK-AVX512-NEXT:    vmovups 16(%rdi), %xmm0
1374; CHECK-AVX512-NEXT:    vmovups %xmm0, 16(%rsi)
1375; CHECK-AVX512-NEXT:    vzeroupper
1376; CHECK-AVX512-NEXT:    retq
1377entry:
1378  %cmp = icmp sgt i32 %x, 17
1379  br i1 %cmp, label %if.then, label %if.end
1380
1381if.then:                                          ; preds = %entry
1382  %b = getelementptr inbounds %struct.S8, ptr %s1, i64 0, i32 1
1383  store i64 1, ptr %b, align 4
1384  br label %if.end
1385
1386if.end:                                           ; preds = %if.then, %entry
1387  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 32, i32 4, i1 false)
1388  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 32, i32 4, i1 false)
1389  ret void
1390}
1391
1392define dso_local void @test_alias(ptr nocapture %A, i32 %x) local_unnamed_addr #0 {
1393; CHECK-LABEL: test_alias:
1394; CHECK:       # %bb.0: # %entry
1395; CHECK-NEXT:    movl %esi, (%rdi)
1396; CHECK-NEXT:    movups (%rdi), %xmm0
1397; CHECK-NEXT:    movups %xmm0, 4(%rdi)
1398; CHECK-NEXT:    retq
1399;
1400; DISABLED-LABEL: test_alias:
1401; DISABLED:       # %bb.0: # %entry
1402; DISABLED-NEXT:    movl %esi, (%rdi)
1403; DISABLED-NEXT:    movups (%rdi), %xmm0
1404; DISABLED-NEXT:    movups %xmm0, 4(%rdi)
1405; DISABLED-NEXT:    retq
1406;
1407; CHECK-AVX2-LABEL: test_alias:
1408; CHECK-AVX2:       # %bb.0: # %entry
1409; CHECK-AVX2-NEXT:    movl %esi, (%rdi)
1410; CHECK-AVX2-NEXT:    vmovups (%rdi), %xmm0
1411; CHECK-AVX2-NEXT:    vmovups %xmm0, 4(%rdi)
1412; CHECK-AVX2-NEXT:    retq
1413;
1414; CHECK-AVX512-LABEL: test_alias:
1415; CHECK-AVX512:       # %bb.0: # %entry
1416; CHECK-AVX512-NEXT:    movl %esi, (%rdi)
1417; CHECK-AVX512-NEXT:    vmovups (%rdi), %xmm0
1418; CHECK-AVX512-NEXT:    vmovups %xmm0, 4(%rdi)
1419; CHECK-AVX512-NEXT:    retq
1420entry:
1421  store i32 %x, ptr %A, align 4
1422  %add.ptr = getelementptr inbounds i8, ptr %A, i64 4
1423  tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr, ptr align 4 %A, i64 16, i32 4, i1 false)
1424  ret void
1425}
1426
1427; Function Attrs: nounwind uwtable
1428define dso_local void @test_noalias(ptr nocapture %A, i32 %x) local_unnamed_addr #0 {
1429; CHECK-LABEL: test_noalias:
1430; CHECK:       # %bb.0: # %entry
1431; CHECK-NEXT:    movl %esi, (%rdi)
1432; CHECK-NEXT:    movl (%rdi), %eax
1433; CHECK-NEXT:    movl %eax, 20(%rdi)
1434; CHECK-NEXT:    movq 4(%rdi), %rax
1435; CHECK-NEXT:    movq %rax, 24(%rdi)
1436; CHECK-NEXT:    movl 12(%rdi), %eax
1437; CHECK-NEXT:    movl %eax, 32(%rdi)
1438; CHECK-NEXT:    retq
1439;
1440; DISABLED-LABEL: test_noalias:
1441; DISABLED:       # %bb.0: # %entry
1442; DISABLED-NEXT:    movl %esi, (%rdi)
1443; DISABLED-NEXT:    movups (%rdi), %xmm0
1444; DISABLED-NEXT:    movups %xmm0, 20(%rdi)
1445; DISABLED-NEXT:    retq
1446;
1447; CHECK-AVX2-LABEL: test_noalias:
1448; CHECK-AVX2:       # %bb.0: # %entry
1449; CHECK-AVX2-NEXT:    movl %esi, (%rdi)
1450; CHECK-AVX2-NEXT:    movl (%rdi), %eax
1451; CHECK-AVX2-NEXT:    movl %eax, 20(%rdi)
1452; CHECK-AVX2-NEXT:    movq 4(%rdi), %rax
1453; CHECK-AVX2-NEXT:    movq %rax, 24(%rdi)
1454; CHECK-AVX2-NEXT:    movl 12(%rdi), %eax
1455; CHECK-AVX2-NEXT:    movl %eax, 32(%rdi)
1456; CHECK-AVX2-NEXT:    retq
1457;
1458; CHECK-AVX512-LABEL: test_noalias:
1459; CHECK-AVX512:       # %bb.0: # %entry
1460; CHECK-AVX512-NEXT:    movl %esi, (%rdi)
1461; CHECK-AVX512-NEXT:    movl (%rdi), %eax
1462; CHECK-AVX512-NEXT:    movl %eax, 20(%rdi)
1463; CHECK-AVX512-NEXT:    movq 4(%rdi), %rax
1464; CHECK-AVX512-NEXT:    movq %rax, 24(%rdi)
1465; CHECK-AVX512-NEXT:    movl 12(%rdi), %eax
1466; CHECK-AVX512-NEXT:    movl %eax, 32(%rdi)
1467; CHECK-AVX512-NEXT:    retq
1468entry:
1469  store i32 %x, ptr %A, align 4
1470  %add.ptr = getelementptr inbounds i8, ptr %A, i64 20
1471  tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr, ptr align 4 %A, i64 16, i32 4, i1 false)
1472  ret void
1473}
1474
1475; Function Attrs: argmemonly nounwind
1476declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i32, i1) #1
1477
1478attributes #0 = { nounwind uwtable }
1479