1; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s
2
3target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
4
5; Check position of the inserted vector load/store.  Vectorized loads should be
6; inserted at the position of the first load in the chain, and stores should be
7; inserted at the position of the last store.
8
9; CHECK-LABEL: @insert_load_point(
10; CHECK: %z = add i32 %x, 4
11; CHECK: load <2 x float>
12; CHECK: %w = add i32 %y, 9
13; CHECK: %foo = add i32 %z, %w
14define void @insert_load_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 {
15entry:
16  %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx
17  %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %idx
18  %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1
19  %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1
20
21  %z = add i32 %x, 4
22  %ld.c = load float, float addrspace(1)* %c.idx.x, align 4
23  %w = add i32 %y, 9
24  %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4
25  %foo = add i32 %z, %w
26
27  store float 0.0, float addrspace(1)* %a.idx.x, align 4
28  store float 0.0, float addrspace(1)* %a.idx.x.1, align 4
29
30  %add = fadd float %ld.c, %ld.c.idx.1
31  store float %add, float addrspace(1)* %b, align 4
32  store i32 %foo, i32 addrspace(3)* null, align 4
33  ret void
34}
35
36; CHECK-LABEL: @insert_store_point(
37; CHECK: %z = add i32 %x, 4
38; CHECK: %w = add i32 %y, 9
39; CHECK: store <2 x float>
40; CHECK: %foo = add i32 %z, %w
41define void @insert_store_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 {
42entry:
43  %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx
44  %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %idx
45  %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1
46  %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1
47
48  %ld.c = load float, float addrspace(1)* %c.idx.x, align 4
49  %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4
50
51  %z = add i32 %x, 4
52  store float 0.0, float addrspace(1)* %a.idx.x, align 4
53  %w = add i32 %y, 9
54  store float 0.0, float addrspace(1)* %a.idx.x.1, align 4
55  %foo = add i32 %z, %w
56
57  %add = fadd float %ld.c, %ld.c.idx.1
58  store float %add, float addrspace(1)* %b, align 4
59  store i32 %foo, i32 addrspace(3)* null, align 4
60  ret void
61}
62
63; Here we have four stores, with an aliasing load before the last one.  We can
64; vectorize the first two stores as <2 x float>, but this vectorized store must
65; be inserted at the location of the second scalar store, not the fourth one.
66;
67; CHECK-LABEL: @insert_store_point_alias
68; CHECK: store <2 x float>
69; CHECK: store float
70; CHECK-SAME: %a.idx.2
71; CHECK: load float, float addrspace(1)* %a.idx.2
72; CHECK: store float
73; CHECK-SAME: %a.idx.3
74define float @insert_store_point_alias(float addrspace(1)* nocapture %a, i64 %idx) {
75  %a.idx = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx
76  %a.idx.1 = getelementptr inbounds float, float addrspace(1)* %a.idx, i64 1
77  %a.idx.2 = getelementptr inbounds float, float addrspace(1)* %a.idx.1, i64 1
78  %a.idx.3 = getelementptr inbounds float, float addrspace(1)* %a.idx.2, i64 1
79
80  store float 0.0, float addrspace(1)* %a.idx, align 4
81  store float 0.0, float addrspace(1)* %a.idx.1, align 4
82  store float 0.0, float addrspace(1)* %a.idx.2, align 4
83  %x = load float, float addrspace(1)* %a.idx.2, align 4
84  store float 0.0, float addrspace(1)* %a.idx.3, align 4
85
86  ret float %x
87}
88
89; Here we have four stores, with an aliasing load before the last one.  We
90; could vectorize two of the stores before the load (although we currently
91; don't), but the important thing is that we *don't* sink the store to
92; a[idx + 1] below the load.
93;
94; CHECK-LABEL: @insert_store_point_alias_ooo
95; CHECK: store float
96; CHECK-SAME: %a.idx.3
97; CHECK: store float
98; CHECK-SAME: %a.idx.1
99; CHECK: store float
100; CHECK-SAME: %a.idx.2
101; CHECK: load float, float addrspace(1)* %a.idx.2
102; CHECK: store float
103; CHECK-SAME: %a.idx
104define float @insert_store_point_alias_ooo(float addrspace(1)* nocapture %a, i64 %idx) {
105  %a.idx = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx
106  %a.idx.1 = getelementptr inbounds float, float addrspace(1)* %a.idx, i64 1
107  %a.idx.2 = getelementptr inbounds float, float addrspace(1)* %a.idx.1, i64 1
108  %a.idx.3 = getelementptr inbounds float, float addrspace(1)* %a.idx.2, i64 1
109
110  store float 0.0, float addrspace(1)* %a.idx.3, align 4
111  store float 0.0, float addrspace(1)* %a.idx.1, align 4
112  store float 0.0, float addrspace(1)* %a.idx.2, align 4
113  %x = load float, float addrspace(1)* %a.idx.2, align 4
114  store float 0.0, float addrspace(1)* %a.idx, align 4
115
116  ret float %x
117}
118
119attributes #0 = { nounwind }
120