1f31811f2SFangrui Song; RUN: opt -mtriple=amdgcn-amd-amdhsa -basic-aa -load-store-vectorizer -S -o - %s | FileCheck %s
2cee313d2SEric Christopher
3cee313d2SEric Christophertarget datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
4cee313d2SEric Christopher
5cee313d2SEric Christopherdeclare i32 @llvm.amdgcn.workitem.id.x() #1
6cee313d2SEric Christopher
7cee313d2SEric Christopher; CHECK-LABEL: @merge_v2p1i8(
8cee313d2SEric Christopher; CHECK: load <2 x i64>
9cee313d2SEric Christopher; CHECK: inttoptr i64 %{{[^ ]+}} to i8 addrspace(1)*
10cee313d2SEric Christopher; CHECK: inttoptr i64 %{{[^ ]+}} to i8 addrspace(1)*
11cee313d2SEric Christopher; CHECK: store <2 x i64> zeroinitializer
12cee313d2SEric Christopherdefine amdgpu_kernel void @merge_v2p1i8(i8 addrspace(1)* addrspace(1)* nocapture %a, i8 addrspace(1)* addrspace(1)* nocapture readonly %b) #0 {
13cee313d2SEric Christopherentry:
14cee313d2SEric Christopher  %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1
15cee313d2SEric Christopher  %b.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b, i64 1
16cee313d2SEric Christopher
17cee313d2SEric Christopher  %ld.c = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b, align 4
18cee313d2SEric Christopher  %ld.c.idx.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b.1, align 4
19cee313d2SEric Christopher
20cee313d2SEric Christopher  store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(1)* %a, align 4
21cee313d2SEric Christopher  store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(1)* %a.1, align 4
22cee313d2SEric Christopher
23cee313d2SEric Christopher  ret void
24cee313d2SEric Christopher}
25cee313d2SEric Christopher
26cee313d2SEric Christopher; CHECK-LABEL: @merge_v2p3i8(
27cee313d2SEric Christopher; CHECK: load <2 x i32>
28cee313d2SEric Christopher; CHECK: inttoptr i32 %{{[^ ]+}} to i8 addrspace(3)*
29cee313d2SEric Christopher; CHECK: inttoptr i32 %{{[^ ]+}} to i8 addrspace(3)*
30cee313d2SEric Christopher; CHECK: store <2 x i32> zeroinitializer
31cee313d2SEric Christopherdefine amdgpu_kernel void @merge_v2p3i8(i8 addrspace(3)* addrspace(3)* nocapture %a, i8 addrspace(3)* addrspace(3)* nocapture readonly %b) #0 {
32cee313d2SEric Christopherentry:
33cee313d2SEric Christopher  %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i64 1
34cee313d2SEric Christopher  %b.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b, i64 1
35cee313d2SEric Christopher
36cee313d2SEric Christopher  %ld.c = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b, align 4
37cee313d2SEric Christopher  %ld.c.idx.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b.1, align 4
38cee313d2SEric Christopher
39cee313d2SEric Christopher  store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(3)* %a, align 4
40cee313d2SEric Christopher  store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(3)* %a.1, align 4
41cee313d2SEric Christopher
42cee313d2SEric Christopher  ret void
43cee313d2SEric Christopher}
44cee313d2SEric Christopher
45cee313d2SEric Christopher; CHECK-LABEL: @merge_load_i64_ptr64(
46cee313d2SEric Christopher; CHECK: load <2 x i64>
47cee313d2SEric Christopher; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1
48cee313d2SEric Christopher; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)*
49cee313d2SEric Christopherdefine amdgpu_kernel void @merge_load_i64_ptr64(i64 addrspace(1)* nocapture %a) #0 {
50cee313d2SEric Christopherentry:
51cee313d2SEric Christopher  %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
52cee313d2SEric Christopher  %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)*
53cee313d2SEric Christopher
54cee313d2SEric Christopher  %ld.0 = load i64, i64 addrspace(1)* %a
55cee313d2SEric Christopher  %ld.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.1.cast
56cee313d2SEric Christopher
57cee313d2SEric Christopher  ret void
58cee313d2SEric Christopher}
59cee313d2SEric Christopher
60cee313d2SEric Christopher; CHECK-LABEL: @merge_load_ptr64_i64(
61cee313d2SEric Christopher; CHECK: load <2 x i64>
62cee313d2SEric Christopher; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0
63cee313d2SEric Christopher; CHECK: inttoptr i64 [[ELT0]] to i8 addrspace(1)*
64cee313d2SEric Christopherdefine amdgpu_kernel void @merge_load_ptr64_i64(i64 addrspace(1)* nocapture %a) #0 {
65cee313d2SEric Christopherentry:
66cee313d2SEric Christopher  %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
67cee313d2SEric Christopher  %a.1 =  getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
68cee313d2SEric Christopher
69cee313d2SEric Christopher  %ld.0 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.cast
70cee313d2SEric Christopher  %ld.1 = load i64, i64 addrspace(1)* %a.1
71cee313d2SEric Christopher
72cee313d2SEric Christopher  ret void
73cee313d2SEric Christopher}
74cee313d2SEric Christopher
75cee313d2SEric Christopher; CHECK-LABEL: @merge_store_ptr64_i64(
76cee313d2SEric Christopher; CHECK: [[ELT0:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64
77*cf284f6cShyeongyu kim; CHECK: insertelement <2 x i64> poison, i64 [[ELT0]], i32 0
78cee313d2SEric Christopher; CHECK: store <2 x i64>
79cee313d2SEric Christopherdefine amdgpu_kernel void @merge_store_ptr64_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, i64 %val1) #0 {
80cee313d2SEric Christopherentry:
81cee313d2SEric Christopher  %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
82cee313d2SEric Christopher  %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
83cee313d2SEric Christopher
84cee313d2SEric Christopher
85cee313d2SEric Christopher  store i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* %a.cast
86cee313d2SEric Christopher  store i64 %val1, i64 addrspace(1)* %a.1
87cee313d2SEric Christopher
88cee313d2SEric Christopher  ret void
89cee313d2SEric Christopher}
90cee313d2SEric Christopher
91cee313d2SEric Christopher; CHECK-LABEL: @merge_store_i64_ptr64(
92cee313d2SEric Christopher; CHECK: [[ELT1:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64
93cee313d2SEric Christopher; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1]], i32 1
94cee313d2SEric Christopher; CHECK: store <2 x i64>
95cee313d2SEric Christopherdefine amdgpu_kernel void @merge_store_i64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(1)* %ptr1) #0 {
96cee313d2SEric Christopherentry:
97cee313d2SEric Christopher  %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1
98cee313d2SEric Christopher  %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to i64 addrspace(1)*
99cee313d2SEric Christopher
100cee313d2SEric Christopher  store i64 %val0, i64 addrspace(1)* %a.cast
101cee313d2SEric Christopher  store i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* %a.1
102cee313d2SEric Christopher
103cee313d2SEric Christopher  ret void
104cee313d2SEric Christopher}
105cee313d2SEric Christopher
106cee313d2SEric Christopher; CHECK-LABEL: @merge_load_i32_ptr32(
107cee313d2SEric Christopher; CHECK: load <2 x i32>
108cee313d2SEric Christopher; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i32> %{{[^ ]+}}, i32 1
109cee313d2SEric Christopher; CHECK: inttoptr i32 [[ELT1]] to i8 addrspace(3)*
110cee313d2SEric Christopherdefine amdgpu_kernel void @merge_load_i32_ptr32(i32 addrspace(3)* nocapture %a) #0 {
111cee313d2SEric Christopherentry:
112cee313d2SEric Christopher  %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1
113cee313d2SEric Christopher  %a.1.cast = bitcast i32 addrspace(3)* %a.1 to i8 addrspace(3)* addrspace(3)*
114cee313d2SEric Christopher
115cee313d2SEric Christopher  %ld.0 = load i32, i32 addrspace(3)* %a
116cee313d2SEric Christopher  %ld.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a.1.cast
117cee313d2SEric Christopher
118cee313d2SEric Christopher  ret void
119cee313d2SEric Christopher}
120cee313d2SEric Christopher
121cee313d2SEric Christopher; CHECK-LABEL: @merge_load_ptr32_i32(
122cee313d2SEric Christopher; CHECK: load <2 x i32>
123cee313d2SEric Christopher; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i32> %{{[^ ]+}}, i32 0
124cee313d2SEric Christopher; CHECK: inttoptr i32 [[ELT0]] to i8 addrspace(3)*
125cee313d2SEric Christopherdefine amdgpu_kernel void @merge_load_ptr32_i32(i32 addrspace(3)* nocapture %a) #0 {
126cee313d2SEric Christopherentry:
127cee313d2SEric Christopher  %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)*
128cee313d2SEric Christopher  %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1
129cee313d2SEric Christopher
130cee313d2SEric Christopher  %ld.0 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a.cast
131cee313d2SEric Christopher  %ld.1 = load i32, i32 addrspace(3)* %a.1
132cee313d2SEric Christopher
133cee313d2SEric Christopher  ret void
134cee313d2SEric Christopher}
135cee313d2SEric Christopher
136cee313d2SEric Christopher; CHECK-LABEL: @merge_store_ptr32_i32(
137cee313d2SEric Christopher; CHECK: [[ELT0:%[^ ]+]] = ptrtoint i8 addrspace(3)* %ptr0 to i32
138*cf284f6cShyeongyu kim; CHECK: insertelement <2 x i32> poison, i32 [[ELT0]], i32 0
139cee313d2SEric Christopher; CHECK: store <2 x i32>
140cee313d2SEric Christopherdefine amdgpu_kernel void @merge_store_ptr32_i32(i32 addrspace(3)* nocapture %a, i8 addrspace(3)* %ptr0, i32 %val1) #0 {
141cee313d2SEric Christopherentry:
142cee313d2SEric Christopher  %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)*
143cee313d2SEric Christopher  %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1
144cee313d2SEric Christopher
145cee313d2SEric Christopher  store i8 addrspace(3)* %ptr0, i8 addrspace(3)* addrspace(3)* %a.cast
146cee313d2SEric Christopher  store i32 %val1, i32 addrspace(3)* %a.1
147cee313d2SEric Christopher
148cee313d2SEric Christopher  ret void
149cee313d2SEric Christopher}
150cee313d2SEric Christopher
151cee313d2SEric Christopher; CHECK-LABEL: @merge_store_i32_ptr32(
152cee313d2SEric Christopher; CHECK: [[ELT1:%[^ ]+]] = ptrtoint i8 addrspace(3)* %ptr1 to i32
153cee313d2SEric Christopher; CHECK: insertelement <2 x i32> %{{[^ ]+}}, i32 [[ELT1]], i32 1
154cee313d2SEric Christopher; CHECK: store <2 x i32>
155cee313d2SEric Christopherdefine amdgpu_kernel void @merge_store_i32_ptr32(i8 addrspace(3)* addrspace(3)* nocapture %a, i32 %val0, i8 addrspace(3)* %ptr1) #0 {
156cee313d2SEric Christopherentry:
157cee313d2SEric Christopher  %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i32 1
158cee313d2SEric Christopher  %a.cast = bitcast i8 addrspace(3)* addrspace(3)* %a to i32 addrspace(3)*
159cee313d2SEric Christopher
160cee313d2SEric Christopher  store i32 %val0, i32 addrspace(3)* %a.cast
161cee313d2SEric Christopher  store i8 addrspace(3)* %ptr1, i8 addrspace(3)* addrspace(3)* %a.1
162cee313d2SEric Christopher
163cee313d2SEric Christopher  ret void
164cee313d2SEric Christopher}
165cee313d2SEric Christopher
166cee313d2SEric Christopher; CHECK-LABEL: @no_merge_store_ptr32_i64(
167cee313d2SEric Christopher; CHECK: store i8 addrspace(3)*
168cee313d2SEric Christopher; CHECK: store i64
169cee313d2SEric Christopherdefine amdgpu_kernel void @no_merge_store_ptr32_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(3)* %ptr0, i64 %val1) #0 {
170cee313d2SEric Christopherentry:
171cee313d2SEric Christopher  %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)*
172cee313d2SEric Christopher  %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
173cee313d2SEric Christopher
174cee313d2SEric Christopher
175cee313d2SEric Christopher  store i8 addrspace(3)* %ptr0, i8 addrspace(3)* addrspace(1)* %a.cast
176cee313d2SEric Christopher  store i64 %val1, i64 addrspace(1)* %a.1
177cee313d2SEric Christopher
178cee313d2SEric Christopher  ret void
179cee313d2SEric Christopher}
180cee313d2SEric Christopher
181cee313d2SEric Christopher; CHECK-LABEL: @no_merge_store_i64_ptr32(
182cee313d2SEric Christopher; CHECK: store i64
183cee313d2SEric Christopher; CHECK: store i8 addrspace(3)*
184cee313d2SEric Christopherdefine amdgpu_kernel void @no_merge_store_i64_ptr32(i8 addrspace(3)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(3)* %ptr1) #0 {
185cee313d2SEric Christopherentry:
186cee313d2SEric Christopher  %a.1 =  getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a, i64 1
187cee313d2SEric Christopher  %a.cast = bitcast i8 addrspace(3)* addrspace(1)* %a to i64 addrspace(1)*
188cee313d2SEric Christopher
189cee313d2SEric Christopher  store i64 %val0, i64 addrspace(1)* %a.cast
190cee313d2SEric Christopher  store i8 addrspace(3)* %ptr1, i8 addrspace(3)* addrspace(1)* %a.1
191cee313d2SEric Christopher
192cee313d2SEric Christopher  ret void
193cee313d2SEric Christopher}
194cee313d2SEric Christopher
195cee313d2SEric Christopher; CHECK-LABEL: @no_merge_load_i64_ptr32(
196cee313d2SEric Christopher; CHECK: load i64,
197cee313d2SEric Christopher; CHECK: load i8 addrspace(3)*,
198cee313d2SEric Christopherdefine amdgpu_kernel void @no_merge_load_i64_ptr32(i64 addrspace(1)* nocapture %a) #0 {
199cee313d2SEric Christopherentry:
200cee313d2SEric Christopher  %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
201cee313d2SEric Christopher  %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(3)* addrspace(1)*
202cee313d2SEric Christopher
203cee313d2SEric Christopher  %ld.0 = load i64, i64 addrspace(1)* %a
204cee313d2SEric Christopher  %ld.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a.1.cast
205cee313d2SEric Christopher
206cee313d2SEric Christopher  ret void
207cee313d2SEric Christopher}
208cee313d2SEric Christopher
209cee313d2SEric Christopher; CHECK-LABEL: @no_merge_load_ptr32_i64(
210cee313d2SEric Christopher; CHECK: load i8 addrspace(3)*,
211cee313d2SEric Christopher; CHECK: load i64,
212cee313d2SEric Christopherdefine amdgpu_kernel void @no_merge_load_ptr32_i64(i64 addrspace(1)* nocapture %a) #0 {
213cee313d2SEric Christopherentry:
214cee313d2SEric Christopher  %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)*
215cee313d2SEric Christopher  %a.1 =  getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
216cee313d2SEric Christopher
217cee313d2SEric Christopher  %ld.0 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a.cast
218cee313d2SEric Christopher  %ld.1 = load i64, i64 addrspace(1)* %a.1
219cee313d2SEric Christopher
220cee313d2SEric Christopher  ret void
221cee313d2SEric Christopher}
222cee313d2SEric Christopher
223cee313d2SEric Christopher; XXX - This isn't merged for some reason
224cee313d2SEric Christopher; CHECK-LABEL: @merge_v2p1i8_v2p1i8(
225cee313d2SEric Christopher; CHECK: load <2 x i8 addrspace(1)*>
226cee313d2SEric Christopher; CHECK: load <2 x i8 addrspace(1)*>
227cee313d2SEric Christopher; CHECK: store <2 x i8 addrspace(1)*>
228cee313d2SEric Christopher; CHECK: store <2 x i8 addrspace(1)*>
229cee313d2SEric Christopherdefine amdgpu_kernel void @merge_v2p1i8_v2p1i8(<2 x i8 addrspace(1)*> addrspace(1)* nocapture noalias %a, <2 x i8 addrspace(1)*> addrspace(1)* nocapture readonly noalias %b) #0 {
230cee313d2SEric Christopherentry:
231cee313d2SEric Christopher  %a.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %a, i64 1
232cee313d2SEric Christopher  %b.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b, i64 1
233cee313d2SEric Christopher
234cee313d2SEric Christopher  %ld.c = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b, align 4
235cee313d2SEric Christopher  %ld.c.idx.1 = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b.1, align 4
236cee313d2SEric Christopher
237cee313d2SEric Christopher  store <2 x i8 addrspace(1)*> zeroinitializer, <2 x i8 addrspace(1)*> addrspace(1)* %a, align 4
238cee313d2SEric Christopher  store <2 x i8 addrspace(1)*> zeroinitializer, <2 x i8 addrspace(1)*> addrspace(1)* %a.1, align 4
239cee313d2SEric Christopher  ret void
240cee313d2SEric Christopher}
241cee313d2SEric Christopher
242cee313d2SEric Christopher; CHECK-LABEL: @merge_load_ptr64_f64(
243cee313d2SEric Christopher; CHECK: load <2 x i64>
244cee313d2SEric Christopher; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0
245cee313d2SEric Christopher; CHECK: [[ELT0_INT:%[^ ]+]] = inttoptr i64 [[ELT0]] to i8 addrspace(1)*
246cee313d2SEric Christopher; CHECK: [[ELT1_INT:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1
247cee313d2SEric Christopher; CHECK: bitcast i64 [[ELT1_INT]] to double
248cee313d2SEric Christopherdefine amdgpu_kernel void @merge_load_ptr64_f64(double addrspace(1)* nocapture %a) #0 {
249cee313d2SEric Christopherentry:
250cee313d2SEric Christopher  %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
251cee313d2SEric Christopher  %a.1 =  getelementptr inbounds double, double addrspace(1)* %a, i64 1
252cee313d2SEric Christopher
253cee313d2SEric Christopher  %ld.0 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.cast
254cee313d2SEric Christopher  %ld.1 = load double, double addrspace(1)* %a.1
255cee313d2SEric Christopher
256cee313d2SEric Christopher  ret void
257cee313d2SEric Christopher}
258cee313d2SEric Christopher
259cee313d2SEric Christopher; CHECK-LABEL: @merge_load_f64_ptr64(
260cee313d2SEric Christopher; CHECK: load <2 x i64>
261cee313d2SEric Christopher; CHECK: [[ELT0:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 0
262cee313d2SEric Christopher; CHECK: bitcast i64 [[ELT0]] to double
263cee313d2SEric Christopher; CHECK: [[ELT1:%[^ ]+]] = extractelement <2 x i64> %{{[^ ]+}}, i32 1
264cee313d2SEric Christopher; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)*
265cee313d2SEric Christopherdefine amdgpu_kernel void @merge_load_f64_ptr64(double addrspace(1)* nocapture %a) #0 {
266cee313d2SEric Christopherentry:
267cee313d2SEric Christopher  %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1
268cee313d2SEric Christopher  %a.1.cast = bitcast double addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)*
269cee313d2SEric Christopher
270cee313d2SEric Christopher  %ld.0 = load double, double addrspace(1)* %a
271cee313d2SEric Christopher  %ld.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.1.cast
272cee313d2SEric Christopher
273cee313d2SEric Christopher  ret void
274cee313d2SEric Christopher}
275cee313d2SEric Christopher
276cee313d2SEric Christopher; CHECK-LABEL: @merge_store_ptr64_f64(
277cee313d2SEric Christopher; CHECK: [[ELT0_INT:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64
278*cf284f6cShyeongyu kim; CHECK: insertelement <2 x i64> poison, i64 [[ELT0_INT]], i32 0
279cee313d2SEric Christopher; CHECK: [[ELT1_INT:%[^ ]+]] = bitcast double %val1 to i64
280cee313d2SEric Christopher; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1
281cee313d2SEric Christopher; CHECK: store <2 x i64>
282cee313d2SEric Christopherdefine amdgpu_kernel void @merge_store_ptr64_f64(double addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, double %val1) #0 {
283cee313d2SEric Christopherentry:
284cee313d2SEric Christopher  %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
285cee313d2SEric Christopher  %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1
286cee313d2SEric Christopher
287cee313d2SEric Christopher  store i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* %a.cast
288cee313d2SEric Christopher  store double %val1, double addrspace(1)* %a.1
289cee313d2SEric Christopher
290cee313d2SEric Christopher  ret void
291cee313d2SEric Christopher}
292cee313d2SEric Christopher
293cee313d2SEric Christopher; CHECK-LABEL: @merge_store_f64_ptr64(
294cee313d2SEric Christopher; CHECK: [[ELT0_INT:%[^ ]+]] = bitcast double %val0 to i64
295*cf284f6cShyeongyu kim; CHECK: insertelement <2 x i64> poison, i64 [[ELT0_INT]], i32 0
296cee313d2SEric Christopher; CHECK: [[ELT1_INT:%[^ ]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64
297cee313d2SEric Christopher; CHECK: insertelement <2 x i64> %{{[^ ]+}}, i64 [[ELT1_INT]], i32 1
298cee313d2SEric Christopher; CHECK: store <2 x i64>
299cee313d2SEric Christopherdefine amdgpu_kernel void @merge_store_f64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, double %val0, i8 addrspace(1)* %ptr1) #0 {
300cee313d2SEric Christopherentry:
301cee313d2SEric Christopher  %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1
302cee313d2SEric Christopher  %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to double addrspace(1)*
303cee313d2SEric Christopher
304cee313d2SEric Christopher  store double %val0, double addrspace(1)* %a.cast
305cee313d2SEric Christopher  store i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* %a.1
306cee313d2SEric Christopher
307cee313d2SEric Christopher  ret void
308cee313d2SEric Christopher}
309cee313d2SEric Christopher
310cee313d2SEric Christopherattributes #0 = { nounwind }
311cee313d2SEric Christopherattributes #1 = { nounwind readnone }
312