1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -slp-vectorizer -slp-vectorize-hor -slp-vectorize-hor-store -S < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=bdver2 | FileCheck %s
3
4define void @i64_simplified(i64* noalias %st, i64* noalias %ld) {
5; CHECK-LABEL: @i64_simplified(
6; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[LD:%.*]] to <2 x i64>*
7; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
8; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
9; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[ST:%.*]] to <4 x i64>*
10; CHECK-NEXT:    store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8
11; CHECK-NEXT:    ret void
12;
13  %arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
14
15  %t0 = load i64, i64* %ld, align 8
16  %t1 = load i64, i64* %arrayidx1, align 8
17
18  %arrayidx3 = getelementptr inbounds i64, i64* %st, i64 1
19  %arrayidx4 = getelementptr inbounds i64, i64* %st, i64 2
20  %arrayidx5 = getelementptr inbounds i64, i64* %st, i64 3
21
22  store i64 %t0, i64* %st, align 8
23  store i64 %t1, i64* %arrayidx3, align 8
24  store i64 %t0, i64* %arrayidx4, align 8
25  store i64 %t1, i64* %arrayidx5, align 8
26  ret void
27}
28
29define void @i64_simplifiedi_reversed(i64* noalias %st, i64* noalias %ld) {
30; CHECK-LABEL: @i64_simplifiedi_reversed(
31; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[LD:%.*]] to <2 x i64>*
32; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
33; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
34; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[ST:%.*]] to <4 x i64>*
35; CHECK-NEXT:    store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8
36; CHECK-NEXT:    ret void
37;
38  %arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
39
40  %t0 = load i64, i64* %ld, align 8
41  %t1 = load i64, i64* %arrayidx1, align 8
42
43  %arrayidx3 = getelementptr inbounds i64, i64* %st, i64 1
44  %arrayidx4 = getelementptr inbounds i64, i64* %st, i64 2
45  %arrayidx5 = getelementptr inbounds i64, i64* %st, i64 3
46
47  store i64 %t1, i64* %st, align 8
48  store i64 %t0, i64* %arrayidx3, align 8
49  store i64 %t1, i64* %arrayidx4, align 8
50  store i64 %t0, i64* %arrayidx5, align 8
51  ret void
52}
53
54define void @i64_simplifiedi_extract(i64* noalias %st, i64* noalias %ld) {
55; CHECK-LABEL: @i64_simplifiedi_extract(
56; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64* [[LD:%.*]] to <2 x i64>*
57; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
58; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
59; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[ST:%.*]] to <4 x i64>*
60; CHECK-NEXT:    store <4 x i64> [[SHUFFLE]], <4 x i64>* [[TMP3]], align 8
61; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i64> [[SHUFFLE]], i32 3
62; CHECK-NEXT:    store i64 [[TMP4]], i64* [[LD]], align 8
63; CHECK-NEXT:    ret void
64;
65  %arrayidx1 = getelementptr inbounds i64, i64* %ld, i64 1
66
67  %t0 = load i64, i64* %ld, align 8
68  %t1 = load i64, i64* %arrayidx1, align 8
69
70  %arrayidx3 = getelementptr inbounds i64, i64* %st, i64 1
71  %arrayidx4 = getelementptr inbounds i64, i64* %st, i64 2
72  %arrayidx5 = getelementptr inbounds i64, i64* %st, i64 3
73
74  store i64 %t0, i64* %st, align 8
75  store i64 %t0, i64* %arrayidx3, align 8
76  store i64 %t0, i64* %arrayidx4, align 8
77  store i64 %t1, i64* %arrayidx5, align 8
78  store i64 %t1, i64* %ld, align 8
79  ret void
80}
81
82