1; RUN: opt -data-layout=A5 -S -load-store-vectorizer -mattr=-unaligned-buffer-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=ALIGNED -check-prefix=ALL %s
2; RUN: opt -data-layout=A5 -S -load-store-vectorizer -mattr=+unaligned-buffer-access,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefix=UNALIGNED -check-prefix=ALL %s
3
4target triple = "amdgcn--"
5
6; ALL-LABEL: @load_unknown_offset_align1_i8(
7; ALL: alloca [128 x i8], align 1
8; UNALIGNED: load <2 x i8>, <2 x i8> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
9
10; ALIGNED: load i8, i8 addrspace(5)* %ptr0, align 1{{$}}
11; ALIGNED: load i8, i8 addrspace(5)* %ptr1, align 1{{$}}
12define amdgpu_kernel void @load_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 {
13  %alloca = alloca [128 x i8], align 1, addrspace(5)
14  %ptr0 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %alloca, i32 0, i32 %offset
15  %val0 = load i8, i8 addrspace(5)* %ptr0, align 1
16  %ptr1 = getelementptr inbounds i8, i8 addrspace(5)* %ptr0, i32 1
17  %val1 = load i8, i8 addrspace(5)* %ptr1, align 1
18  %add = add i8 %val0, %val1
19  store i8 %add, i8 addrspace(1)* %out
20  ret void
21}
22
23; ALL-LABEL: @load_unknown_offset_align1_i16(
24; ALL: alloca [128 x i16], align 1, addrspace(5){{$}}
25; UNALIGNED: load <2 x i16>, <2 x i16> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
26
27; ALIGNED: load i16, i16 addrspace(5)* %ptr0, align 1{{$}}
28; ALIGNED: load i16, i16 addrspace(5)* %ptr1, align 1{{$}}
29define amdgpu_kernel void @load_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 {
30  %alloca = alloca [128 x i16], align 1, addrspace(5)
31  %ptr0 = getelementptr inbounds [128 x i16], [128 x i16] addrspace(5)* %alloca, i32 0, i32 %offset
32  %val0 = load i16, i16 addrspace(5)* %ptr0, align 1
33  %ptr1 = getelementptr inbounds i16, i16 addrspace(5)* %ptr0, i32 1
34  %val1 = load i16, i16 addrspace(5)* %ptr1, align 1
35  %add = add i16 %val0, %val1
36  store i16 %add, i16 addrspace(1)* %out
37  ret void
38}
39
40; FIXME: Although the offset is unknown here, we know it is a multiple
41; of the element size, so should still be align 4
42
43; ALL-LABEL: @load_unknown_offset_align1_i32(
44; ALL: alloca [128 x i32], align 1
45; UNALIGNED: load <2 x i32>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
46
47; ALIGNED: load i32, i32 addrspace(5)* %ptr0, align 1
48; ALIGNED: load i32, i32 addrspace(5)* %ptr1, align 1
49define amdgpu_kernel void @load_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
50  %alloca = alloca [128 x i32], align 1, addrspace(5)
51  %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset
52  %val0 = load i32, i32 addrspace(5)* %ptr0, align 1
53  %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %ptr0, i32 1
54  %val1 = load i32, i32 addrspace(5)* %ptr1, align 1
55  %add = add i32 %val0, %val1
56  store i32 %add, i32 addrspace(1)* %out
57  ret void
58}
59
60; FIXME: Should always increase alignment of the load
61; Make sure alloca alignment isn't decreased
62; ALL-LABEL: @load_alloca16_unknown_offset_align1_i32(
63; ALL: alloca [128 x i32], align 16
64
65; UNALIGNED: load <2 x i32>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
66
67; FIXME: Should change alignment
68; ALIGNED: load i32
69; ALIGNED: load i32
70define amdgpu_kernel void @load_alloca16_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
71  %alloca = alloca [128 x i32], align 16, addrspace(5)
72  %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset
73  %val0 = load i32, i32 addrspace(5)* %ptr0, align 1
74  %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %ptr0, i32 1
75  %val1 = load i32, i32 addrspace(5)* %ptr1, align 1
76  %add = add i32 %val0, %val1
77  store i32 %add, i32 addrspace(1)* %out
78  ret void
79}
80
81; ALL-LABEL: @store_unknown_offset_align1_i8(
82; ALL: alloca [128 x i8], align 1
83; UNALIGNED: store <2 x i8> <i8 9, i8 10>, <2 x i8> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
84
85; ALIGNED: store i8 9, i8 addrspace(5)* %ptr0, align 1{{$}}
86; ALIGNED: store i8 10, i8 addrspace(5)* %ptr1, align 1{{$}}
87define amdgpu_kernel void @store_unknown_offset_align1_i8(i8 addrspace(1)* noalias %out, i32 %offset) #0 {
88  %alloca = alloca [128 x i8], align 1, addrspace(5)
89  %ptr0 = getelementptr inbounds [128 x i8], [128 x i8] addrspace(5)* %alloca, i32 0, i32 %offset
90  store i8 9, i8 addrspace(5)* %ptr0, align 1
91  %ptr1 = getelementptr inbounds i8, i8 addrspace(5)* %ptr0, i32 1
92  store i8 10, i8 addrspace(5)* %ptr1, align 1
93  ret void
94}
95
96; ALL-LABEL: @store_unknown_offset_align1_i16(
97; ALL: alloca [128 x i16], align 1
98; UNALIGNED: store <2 x i16> <i16 9, i16 10>, <2 x i16> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
99
100; ALIGNED: store i16 9, i16 addrspace(5)* %ptr0, align 1{{$}}
101; ALIGNED: store i16 10, i16 addrspace(5)* %ptr1, align 1{{$}}
102define amdgpu_kernel void @store_unknown_offset_align1_i16(i16 addrspace(1)* noalias %out, i32 %offset) #0 {
103  %alloca = alloca [128 x i16], align 1, addrspace(5)
104  %ptr0 = getelementptr inbounds [128 x i16], [128 x i16] addrspace(5)* %alloca, i32 0, i32 %offset
105  store i16 9, i16 addrspace(5)* %ptr0, align 1
106  %ptr1 = getelementptr inbounds i16, i16 addrspace(5)* %ptr0, i32 1
107  store i16 10, i16 addrspace(5)* %ptr1, align 1
108  ret void
109}
110
111; FIXME: Although the offset is unknown here, we know it is a multiple
112; of the element size, so it still should be align 4.
113
114; ALL-LABEL: @store_unknown_offset_align1_i32(
115; ALL: alloca [128 x i32], align 1
116
117; UNALIGNED: store <2 x i32> <i32 9, i32 10>, <2 x i32> addrspace(5)* %{{[0-9]+}}, align 1{{$}}
118
119; ALIGNED: store i32 9, i32 addrspace(5)* %ptr0, align 1
120; ALIGNED: store i32 10, i32 addrspace(5)* %ptr1, align 1
121define amdgpu_kernel void @store_unknown_offset_align1_i32(i32 addrspace(1)* noalias %out, i32 %offset) #0 {
122  %alloca = alloca [128 x i32], align 1, addrspace(5)
123  %ptr0 = getelementptr inbounds [128 x i32], [128 x i32] addrspace(5)* %alloca, i32 0, i32 %offset
124  store i32 9, i32 addrspace(5)* %ptr0, align 1
125  %ptr1 = getelementptr inbounds i32, i32 addrspace(5)* %ptr0, i32 1
126  store i32 10, i32 addrspace(5)* %ptr1, align 1
127  ret void
128}
129
130attributes #0 = { nounwind }
131
132