1*f31811f2SFangrui Song; RUN: opt -mtriple=amdgcn-amd-amdhsa -basic-aa -load-store-vectorizer -S -o - %s | FileCheck %s
2cee313d2SEric Christopher; RUN: opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s
3cee313d2SEric Christopher
4cee313d2SEric Christophertarget datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
5cee313d2SEric Christopher
6cee313d2SEric Christopher; Check position of the inserted vector load/store.  Vectorized loads should be
7cee313d2SEric Christopher; inserted at the position of the first load in the chain, and stores should be
8cee313d2SEric Christopher; inserted at the position of the last store.
9cee313d2SEric Christopher
10cee313d2SEric Christopher; CHECK-LABEL: @insert_load_point(
11cee313d2SEric Christopher; CHECK: %z = add i32 %x, 4
12cee313d2SEric Christopher; CHECK: load <2 x float>
13cee313d2SEric Christopher; CHECK: %w = add i32 %y, 9
14cee313d2SEric Christopher; CHECK: %foo = add i32 %z, %w
15cee313d2SEric Christopherdefine amdgpu_kernel void @insert_load_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 {
16cee313d2SEric Christopherentry:
17cee313d2SEric Christopher  %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx
18cee313d2SEric Christopher  %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %idx
19cee313d2SEric Christopher  %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1
20cee313d2SEric Christopher  %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1
21cee313d2SEric Christopher
22cee313d2SEric Christopher  %z = add i32 %x, 4
23cee313d2SEric Christopher  %ld.c = load float, float addrspace(1)* %c.idx.x, align 4
24cee313d2SEric Christopher  %w = add i32 %y, 9
25cee313d2SEric Christopher  %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4
26cee313d2SEric Christopher  %foo = add i32 %z, %w
27cee313d2SEric Christopher
28cee313d2SEric Christopher  store float 0.0, float addrspace(1)* %a.idx.x, align 4
29cee313d2SEric Christopher  store float 0.0, float addrspace(1)* %a.idx.x.1, align 4
30cee313d2SEric Christopher
31cee313d2SEric Christopher  %add = fadd float %ld.c, %ld.c.idx.1
32cee313d2SEric Christopher  store float %add, float addrspace(1)* %b, align 4
33cee313d2SEric Christopher  store i32 %foo, i32 addrspace(3)* null, align 4
34cee313d2SEric Christopher  ret void
35cee313d2SEric Christopher}
36cee313d2SEric Christopher
37cee313d2SEric Christopher; CHECK-LABEL: @insert_store_point(
38cee313d2SEric Christopher; CHECK: %z = add i32 %x, 4
39cee313d2SEric Christopher; CHECK: %w = add i32 %y, 9
40cee313d2SEric Christopher; CHECK: store <2 x float>
41cee313d2SEric Christopher; CHECK: %foo = add i32 %z, %w
42cee313d2SEric Christopherdefine amdgpu_kernel void @insert_store_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 {
43cee313d2SEric Christopherentry:
44cee313d2SEric Christopher  %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx
45cee313d2SEric Christopher  %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %idx
46cee313d2SEric Christopher  %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1
47cee313d2SEric Christopher  %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1
48cee313d2SEric Christopher
49cee313d2SEric Christopher  %ld.c = load float, float addrspace(1)* %c.idx.x, align 4
50cee313d2SEric Christopher  %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4
51cee313d2SEric Christopher
52cee313d2SEric Christopher  %z = add i32 %x, 4
53cee313d2SEric Christopher  store float 0.0, float addrspace(1)* %a.idx.x, align 4
54cee313d2SEric Christopher  %w = add i32 %y, 9
55cee313d2SEric Christopher  store float 0.0, float addrspace(1)* %a.idx.x.1, align 4
56cee313d2SEric Christopher  %foo = add i32 %z, %w
57cee313d2SEric Christopher
58cee313d2SEric Christopher  %add = fadd float %ld.c, %ld.c.idx.1
59cee313d2SEric Christopher  store float %add, float addrspace(1)* %b, align 4
60cee313d2SEric Christopher  store i32 %foo, i32 addrspace(3)* null, align 4
61cee313d2SEric Christopher  ret void
62cee313d2SEric Christopher}
63cee313d2SEric Christopher
64cee313d2SEric Christopher; Here we have four stores, with an aliasing load before the last one.  We can
65cee313d2SEric Christopher; vectorize the first three stores as <3 x float>, but this vectorized store must
66cee313d2SEric Christopher; be inserted at the location of the third scalar store, not the fourth one.
67cee313d2SEric Christopher;
68cee313d2SEric Christopher; CHECK-LABEL: @insert_store_point_alias
69cee313d2SEric Christopher; CHECK: store <3 x float>
70cee313d2SEric Christopher; CHECK: load float, float addrspace(1)* %a.idx.2
71cee313d2SEric Christopher; CHECK: store float
72cee313d2SEric Christopher; CHECK-SAME: %a.idx.3
73cee313d2SEric Christopherdefine float @insert_store_point_alias(float addrspace(1)* nocapture %a, i64 %idx) {
74cee313d2SEric Christopher  %a.idx = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx
75cee313d2SEric Christopher  %a.idx.1 = getelementptr inbounds float, float addrspace(1)* %a.idx, i64 1
76cee313d2SEric Christopher  %a.idx.2 = getelementptr inbounds float, float addrspace(1)* %a.idx.1, i64 1
77cee313d2SEric Christopher  %a.idx.3 = getelementptr inbounds float, float addrspace(1)* %a.idx.2, i64 1
78cee313d2SEric Christopher
79cee313d2SEric Christopher  store float 0.0, float addrspace(1)* %a.idx, align 4
80cee313d2SEric Christopher  store float 0.0, float addrspace(1)* %a.idx.1, align 4
81cee313d2SEric Christopher  store float 0.0, float addrspace(1)* %a.idx.2, align 4
82cee313d2SEric Christopher  %x = load float, float addrspace(1)* %a.idx.2, align 4
83cee313d2SEric Christopher  store float 0.0, float addrspace(1)* %a.idx.3, align 4
84cee313d2SEric Christopher
85cee313d2SEric Christopher  ret float %x
86cee313d2SEric Christopher}
87cee313d2SEric Christopher
88cee313d2SEric Christopher; Here we have four stores, with an aliasing load before the last one.  We
89cee313d2SEric Christopher; could vectorize two of the stores before the load (although we currently
90cee313d2SEric Christopher; don't), but the important thing is that we *don't* sink the store to
91cee313d2SEric Christopher; a[idx + 1] below the load.
92cee313d2SEric Christopher;
93cee313d2SEric Christopher; CHECK-LABEL: @insert_store_point_alias_ooo
94cee313d2SEric Christopher; CHECK: store float
95cee313d2SEric Christopher; CHECK-SAME: %a.idx.3
96cee313d2SEric Christopher; CHECK: store float
97cee313d2SEric Christopher; CHECK-SAME: %a.idx.1
98cee313d2SEric Christopher; CHECK: store float
99cee313d2SEric Christopher; CHECK-SAME: %a.idx.2
100cee313d2SEric Christopher; CHECK: load float, float addrspace(1)* %a.idx.2
101cee313d2SEric Christopher; CHECK: store float
102cee313d2SEric Christopher; CHECK-SAME: %a.idx
103cee313d2SEric Christopherdefine float @insert_store_point_alias_ooo(float addrspace(1)* nocapture %a, i64 %idx) {
104cee313d2SEric Christopher  %a.idx = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx
105cee313d2SEric Christopher  %a.idx.1 = getelementptr inbounds float, float addrspace(1)* %a.idx, i64 1
106cee313d2SEric Christopher  %a.idx.2 = getelementptr inbounds float, float addrspace(1)* %a.idx.1, i64 1
107cee313d2SEric Christopher  %a.idx.3 = getelementptr inbounds float, float addrspace(1)* %a.idx.2, i64 1
108cee313d2SEric Christopher
109cee313d2SEric Christopher  store float 0.0, float addrspace(1)* %a.idx.3, align 4
110cee313d2SEric Christopher  store float 0.0, float addrspace(1)* %a.idx.1, align 4
111cee313d2SEric Christopher  store float 0.0, float addrspace(1)* %a.idx.2, align 4
112cee313d2SEric Christopher  %x = load float, float addrspace(1)* %a.idx.2, align 4
113cee313d2SEric Christopher  store float 0.0, float addrspace(1)* %a.idx, align 4
114cee313d2SEric Christopher
115cee313d2SEric Christopher  ret float %x
116cee313d2SEric Christopher}
117cee313d2SEric Christopher
118cee313d2SEric Christopherattributes #0 = { nounwind }
119