1// REQUIRES: amdgpu-registered-target
2// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
3
4#pragma OPENCL EXTENSION cl_khr_fp64 : enable
5#pragma OPENCL EXTENSION cl_khr_fp16 : enable
6
7typedef char __attribute__((ext_vector_type(2))) char2;
8typedef char __attribute__((ext_vector_type(3))) char3;
9typedef char __attribute__((ext_vector_type(4))) char4;
10typedef char __attribute__((ext_vector_type(8))) char8;
11typedef char __attribute__((ext_vector_type(16))) char16;
12
13typedef short __attribute__((ext_vector_type(2))) short2;
14typedef short __attribute__((ext_vector_type(3))) short3;
15typedef short __attribute__((ext_vector_type(4))) short4;
16typedef short __attribute__((ext_vector_type(8))) short8;
17typedef short __attribute__((ext_vector_type(16))) short16;
18
19typedef int __attribute__((ext_vector_type(2))) int2;
20typedef int __attribute__((ext_vector_type(3))) int3;
21typedef int __attribute__((ext_vector_type(4))) int4;
22typedef int __attribute__((ext_vector_type(8))) int8;
23typedef int __attribute__((ext_vector_type(16))) int16;
24
25typedef long __attribute__((ext_vector_type(2))) long2;
26typedef long __attribute__((ext_vector_type(3))) long3;
27typedef long __attribute__((ext_vector_type(4))) long4;
28typedef long __attribute__((ext_vector_type(8))) long8;
29typedef long __attribute__((ext_vector_type(16))) long16;
30
31typedef half __attribute__((ext_vector_type(2))) half2;
32typedef half __attribute__((ext_vector_type(3))) half3;
33typedef half __attribute__((ext_vector_type(4))) half4;
34typedef half __attribute__((ext_vector_type(8))) half8;
35typedef half __attribute__((ext_vector_type(16))) half16;
36
37typedef float __attribute__((ext_vector_type(2))) float2;
38typedef float __attribute__((ext_vector_type(3))) float3;
39typedef float __attribute__((ext_vector_type(4))) float4;
40typedef float __attribute__((ext_vector_type(8))) float8;
41typedef float __attribute__((ext_vector_type(16))) float16;
42
43typedef double __attribute__((ext_vector_type(2))) double2;
44typedef double __attribute__((ext_vector_type(3))) double3;
45typedef double __attribute__((ext_vector_type(4))) double4;
46typedef double __attribute__((ext_vector_type(8))) double8;
47typedef double __attribute__((ext_vector_type(16))) double16;
48
49// CHECK: @local_memory_alignment_global.lds_i8 = internal addrspace(3) global [4 x i8] undef, align 1
50// CHECK: @local_memory_alignment_global.lds_v2i8 = internal addrspace(3) global [4 x <2 x i8>] undef, align 2
51// CHECK: @local_memory_alignment_global.lds_v3i8 = internal addrspace(3) global [4 x <3 x i8>] undef, align 4
52// CHECK: @local_memory_alignment_global.lds_v4i8 = internal addrspace(3) global [4 x <4 x i8>] undef, align 4
53// CHECK: @local_memory_alignment_global.lds_v8i8 = internal addrspace(3) global [4 x <8 x i8>] undef, align 8
54// CHECK: @local_memory_alignment_global.lds_v16i8 = internal addrspace(3) global [4 x <16 x i8>] undef, align 16
55// CHECK: @local_memory_alignment_global.lds_i16 = internal addrspace(3) global [4 x i16] undef, align 2
56// CHECK: @local_memory_alignment_global.lds_v2i16 = internal addrspace(3) global [4 x <2 x i16>] undef, align 4
57// CHECK: @local_memory_alignment_global.lds_v3i16 = internal addrspace(3) global [4 x <3 x i16>] undef, align 8
58// CHECK: @local_memory_alignment_global.lds_v4i16 = internal addrspace(3) global [4 x <4 x i16>] undef, align 8
59// CHECK: @local_memory_alignment_global.lds_v8i16 = internal addrspace(3) global [4 x <8 x i16>] undef, align 16
60// CHECK: @local_memory_alignment_global.lds_v16i16 = internal addrspace(3) global [4 x <16 x i16>] undef, align 32
61// CHECK: @local_memory_alignment_global.lds_i32 = internal addrspace(3) global [4 x i32] undef, align 4
62// CHECK: @local_memory_alignment_global.lds_v2i32 = internal addrspace(3) global [4 x <2 x i32>] undef, align 8
63// CHECK: @local_memory_alignment_global.lds_v3i32 = internal addrspace(3) global [4 x <3 x i32>] undef, align 16
64// CHECK: @local_memory_alignment_global.lds_v4i32 = internal addrspace(3) global [4 x <4 x i32>] undef, align 16
65// CHECK: @local_memory_alignment_global.lds_v8i32 = internal addrspace(3) global [4 x <8 x i32>] undef, align 32
66// CHECK: @local_memory_alignment_global.lds_v16i32 = internal addrspace(3) global [4 x <16 x i32>] undef, align 64
67// CHECK: @local_memory_alignment_global.lds_i64 = internal addrspace(3) global [4 x i64] undef, align 8
68// CHECK: @local_memory_alignment_global.lds_v2i64 = internal addrspace(3) global [4 x <2 x i64>] undef, align 16
69// CHECK: @local_memory_alignment_global.lds_v3i64 = internal addrspace(3) global [4 x <3 x i64>] undef, align 32
70// CHECK: @local_memory_alignment_global.lds_v4i64 = internal addrspace(3) global [4 x <4 x i64>] undef, align 32
71// CHECK: @local_memory_alignment_global.lds_v8i64 = internal addrspace(3) global [4 x <8 x i64>] undef, align 64
72// CHECK: @local_memory_alignment_global.lds_v16i64 = internal addrspace(3) global [4 x <16 x i64>] undef, align 128
73// CHECK: @local_memory_alignment_global.lds_f16 = internal addrspace(3) global [4 x half] undef, align 2
74// CHECK: @local_memory_alignment_global.lds_v2f16 = internal addrspace(3) global [4 x <2 x half>] undef, align 4
75// CHECK: @local_memory_alignment_global.lds_v3f16 = internal addrspace(3) global [4 x <3 x half>] undef, align 8
76// CHECK: @local_memory_alignment_global.lds_v4f16 = internal addrspace(3) global [4 x <4 x half>] undef, align 8
77// CHECK: @local_memory_alignment_global.lds_v8f16 = internal addrspace(3) global [4 x <8 x half>] undef, align 16
78// CHECK: @local_memory_alignment_global.lds_v16f16 = internal addrspace(3) global [4 x <16 x half>] undef, align 32
79// CHECK: @local_memory_alignment_global.lds_f32 = internal addrspace(3) global [4 x float] undef, align 4
80// CHECK: @local_memory_alignment_global.lds_v2f32 = internal addrspace(3) global [4 x <2 x float>] undef, align 8
81// CHECK: @local_memory_alignment_global.lds_v3f32 = internal addrspace(3) global [4 x <3 x float>] undef, align 16
82// CHECK: @local_memory_alignment_global.lds_v4f32 = internal addrspace(3) global [4 x <4 x float>] undef, align 16
83// CHECK: @local_memory_alignment_global.lds_v8f32 = internal addrspace(3) global [4 x <8 x float>] undef, align 32
84// CHECK: @local_memory_alignment_global.lds_v16f32 = internal addrspace(3) global [4 x <16 x float>] undef, align 64
85// CHECK: @local_memory_alignment_global.lds_f64 = internal addrspace(3) global [4 x double] undef, align 8
86// CHECK: @local_memory_alignment_global.lds_v2f64 = internal addrspace(3) global [4 x <2 x double>] undef, align 16
87// CHECK: @local_memory_alignment_global.lds_v3f64 = internal addrspace(3) global [4 x <3 x double>] undef, align 32
88// CHECK: @local_memory_alignment_global.lds_v4f64 = internal addrspace(3) global [4 x <4 x double>] undef, align 32
89// CHECK: @local_memory_alignment_global.lds_v8f64 = internal addrspace(3) global [4 x <8 x double>] undef, align 64
90// CHECK: @local_memory_alignment_global.lds_v16f64 = internal addrspace(3) global [4 x <16 x double>] undef, align 128
91
92
93// CHECK-LABEL: @local_memory_alignment_global(
94// CHECK: store volatile i8 0, i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @local_memory_alignment_global.lds_i8, i32 0, i32 0), align 1
95// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8> addrspace(3)* getelementptr inbounds ([4 x <2 x i8>], [4 x <2 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v2i8, i32 0, i32 0), align 2
96// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8> addrspace(3)* bitcast ([4 x <3 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v3i8 to <4 x i8> addrspace(3)*), align 4
97// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8> addrspace(3)* getelementptr inbounds ([4 x <4 x i8>], [4 x <4 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v4i8, i32 0, i32 0), align 4
98// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8> addrspace(3)* getelementptr inbounds ([4 x <8 x i8>], [4 x <8 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v8i8, i32 0, i32 0), align 8
99// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8> addrspace(3)* getelementptr inbounds ([4 x <16 x i8>], [4 x <16 x i8>] addrspace(3)* @local_memory_alignment_global.lds_v16i8, i32 0, i32 0), align 16
100// CHECK: store volatile i16 0, i16 addrspace(3)* getelementptr inbounds ([4 x i16], [4 x i16] addrspace(3)* @local_memory_alignment_global.lds_i16, i32 0, i32 0), align 2
101// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16> addrspace(3)* getelementptr inbounds ([4 x <2 x i16>], [4 x <2 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v2i16, i32 0, i32 0), align 4
102// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16> addrspace(3)* bitcast ([4 x <3 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v3i16 to <4 x i16> addrspace(3)*), align 8
103// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16> addrspace(3)* getelementptr inbounds ([4 x <4 x i16>], [4 x <4 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v4i16, i32 0, i32 0), align 8
104// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16> addrspace(3)* getelementptr inbounds ([4 x <8 x i16>], [4 x <8 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v8i16, i32 0, i32 0), align 16
105// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16> addrspace(3)* getelementptr inbounds ([4 x <16 x i16>], [4 x <16 x i16>] addrspace(3)* @local_memory_alignment_global.lds_v16i16, i32 0, i32 0), align 32
106// CHECK: store volatile i32 0, i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @local_memory_alignment_global.lds_i32, i32 0, i32 0), align 4
107// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32> addrspace(3)* getelementptr inbounds ([4 x <2 x i32>], [4 x <2 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v2i32, i32 0, i32 0), align 8
108// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32> addrspace(3)* bitcast ([4 x <3 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v3i32 to <4 x i32> addrspace(3)*), align 16
109// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32> addrspace(3)* getelementptr inbounds ([4 x <4 x i32>], [4 x <4 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v4i32, i32 0, i32 0), align 16
110// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32> addrspace(3)* getelementptr inbounds ([4 x <8 x i32>], [4 x <8 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v8i32, i32 0, i32 0), align 32
111// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32> addrspace(3)* getelementptr inbounds ([4 x <16 x i32>], [4 x <16 x i32>] addrspace(3)* @local_memory_alignment_global.lds_v16i32, i32 0, i32 0), align 64
112// CHECK: store volatile i64 0, i64 addrspace(3)* getelementptr inbounds ([4 x i64], [4 x i64] addrspace(3)* @local_memory_alignment_global.lds_i64, i32 0, i32 0), align 8
113// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64> addrspace(3)* getelementptr inbounds ([4 x <2 x i64>], [4 x <2 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v2i64, i32 0, i32 0), align 16
114// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64> addrspace(3)* bitcast ([4 x <3 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v3i64 to <4 x i64> addrspace(3)*), align 32
115// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64> addrspace(3)* getelementptr inbounds ([4 x <4 x i64>], [4 x <4 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v4i64, i32 0, i32 0), align 32
116// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64> addrspace(3)* getelementptr inbounds ([4 x <8 x i64>], [4 x <8 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v8i64, i32 0, i32 0), align 64
117// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64> addrspace(3)* getelementptr inbounds ([4 x <16 x i64>], [4 x <16 x i64>] addrspace(3)* @local_memory_alignment_global.lds_v16i64, i32 0, i32 0), align 128
118// CHECK: store volatile half 0xH0000, half addrspace(3)* getelementptr inbounds ([4 x half], [4 x half] addrspace(3)* @local_memory_alignment_global.lds_f16, i32 0, i32 0), align 2
119// CHECK: store volatile <2 x half> zeroinitializer, <2 x half> addrspace(3)* getelementptr inbounds ([4 x <2 x half>], [4 x <2 x half>] addrspace(3)* @local_memory_alignment_global.lds_v2f16, i32 0, i32 0), align 4
120// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half> addrspace(3)* bitcast ([4 x <3 x half>] addrspace(3)* @local_memory_alignment_global.lds_v3f16 to <4 x half> addrspace(3)*), align 8
121// CHECK: store volatile <4 x half> zeroinitializer, <4 x half> addrspace(3)* getelementptr inbounds ([4 x <4 x half>], [4 x <4 x half>] addrspace(3)* @local_memory_alignment_global.lds_v4f16, i32 0, i32 0), align 8
122// CHECK: store volatile <8 x half> zeroinitializer, <8 x half> addrspace(3)* getelementptr inbounds ([4 x <8 x half>], [4 x <8 x half>] addrspace(3)* @local_memory_alignment_global.lds_v8f16, i32 0, i32 0), align 16
123// CHECK: store volatile <16 x half> zeroinitializer, <16 x half> addrspace(3)* getelementptr inbounds ([4 x <16 x half>], [4 x <16 x half>] addrspace(3)* @local_memory_alignment_global.lds_v16f16, i32 0, i32 0), align 32
124// CHECK: store volatile float 0.000000e+00, float addrspace(3)* getelementptr inbounds ([4 x float], [4 x float] addrspace(3)* @local_memory_alignment_global.lds_f32, i32 0, i32 0), align 4
125// CHECK: store volatile <2 x float> zeroinitializer, <2 x float> addrspace(3)* getelementptr inbounds ([4 x <2 x float>], [4 x <2 x float>] addrspace(3)* @local_memory_alignment_global.lds_v2f32, i32 0, i32 0), align 8
126// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float> addrspace(3)* bitcast ([4 x <3 x float>] addrspace(3)* @local_memory_alignment_global.lds_v3f32 to <4 x float> addrspace(3)*), align 16
127// CHECK: store volatile <4 x float> zeroinitializer, <4 x float> addrspace(3)* getelementptr inbounds ([4 x <4 x float>], [4 x <4 x float>] addrspace(3)* @local_memory_alignment_global.lds_v4f32, i32 0, i32 0), align 16
128// CHECK: store volatile <8 x float> zeroinitializer, <8 x float> addrspace(3)* getelementptr inbounds ([4 x <8 x float>], [4 x <8 x float>] addrspace(3)* @local_memory_alignment_global.lds_v8f32, i32 0, i32 0), align 32
129// CHECK: store volatile <16 x float> zeroinitializer, <16 x float> addrspace(3)* getelementptr inbounds ([4 x <16 x float>], [4 x <16 x float>] addrspace(3)* @local_memory_alignment_global.lds_v16f32, i32 0, i32 0), align 64
130// CHECK: store volatile double 0.000000e+00, double addrspace(3)* getelementptr inbounds ([4 x double], [4 x double] addrspace(3)* @local_memory_alignment_global.lds_f64, i32 0, i32 0), align 8
131// CHECK: store volatile <2 x double> zeroinitializer, <2 x double> addrspace(3)* getelementptr inbounds ([4 x <2 x double>], [4 x <2 x double>] addrspace(3)* @local_memory_alignment_global.lds_v2f64, i32 0, i32 0), align 16
132// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double> addrspace(3)* bitcast ([4 x <3 x double>] addrspace(3)* @local_memory_alignment_global.lds_v3f64 to <4 x double> addrspace(3)*), align 32
133// CHECK: store volatile <4 x double> zeroinitializer, <4 x double> addrspace(3)* getelementptr inbounds ([4 x <4 x double>], [4 x <4 x double>] addrspace(3)* @local_memory_alignment_global.lds_v4f64, i32 0, i32 0), align 32
134// CHECK: store volatile <8 x double> zeroinitializer, <8 x double> addrspace(3)* getelementptr inbounds ([4 x <8 x double>], [4 x <8 x double>] addrspace(3)* @local_memory_alignment_global.lds_v8f64, i32 0, i32 0), align 64
135// CHECK: store volatile <16 x double> zeroinitializer, <16 x double> addrspace(3)* getelementptr inbounds ([4 x <16 x double>], [4 x <16 x double>] addrspace(3)* @local_memory_alignment_global.lds_v16f64, i32 0, i32 0), align 128
136kernel void local_memory_alignment_global()
137{
138  volatile local char lds_i8[4];
139  volatile local char2 lds_v2i8[4];
140  volatile local char3 lds_v3i8[4];
141  volatile local char4 lds_v4i8[4];
142  volatile local char8 lds_v8i8[4];
143  volatile local char16 lds_v16i8[4];
144
145  volatile local short lds_i16[4];
146  volatile local short2 lds_v2i16[4];
147  volatile local short3 lds_v3i16[4];
148  volatile local short4 lds_v4i16[4];
149  volatile local short8 lds_v8i16[4];
150  volatile local short16 lds_v16i16[4];
151
152  volatile local int lds_i32[4];
153  volatile local int2 lds_v2i32[4];
154  volatile local int3 lds_v3i32[4];
155  volatile local int4 lds_v4i32[4];
156  volatile local int8 lds_v8i32[4];
157  volatile local int16 lds_v16i32[4];
158
159  volatile local long lds_i64[4];
160  volatile local long2 lds_v2i64[4];
161  volatile local long3 lds_v3i64[4];
162  volatile local long4 lds_v4i64[4];
163  volatile local long8 lds_v8i64[4];
164  volatile local long16 lds_v16i64[4];
165
166  volatile local half lds_f16[4];
167  volatile local half2 lds_v2f16[4];
168  volatile local half3 lds_v3f16[4];
169  volatile local half4 lds_v4f16[4];
170  volatile local half8 lds_v8f16[4];
171  volatile local half16 lds_v16f16[4];
172
173  volatile local float lds_f32[4];
174  volatile local float2 lds_v2f32[4];
175  volatile local float3 lds_v3f32[4];
176  volatile local float4 lds_v4f32[4];
177  volatile local float8 lds_v8f32[4];
178  volatile local float16 lds_v16f32[4];
179
180  volatile local double lds_f64[4];
181  volatile local double2 lds_v2f64[4];
182  volatile local double3 lds_v3f64[4];
183  volatile local double4 lds_v4f64[4];
184  volatile local double8 lds_v8f64[4];
185  volatile local double16 lds_v16f64[4];
186
187  *lds_i8 = 0;
188  *lds_v2i8 = 0;
189  *lds_v3i8 = 0;
190  *lds_v4i8 = 0;
191  *lds_v8i8 = 0;
192  *lds_v16i8 = 0;
193
194  *lds_i16 = 0;
195  *lds_v2i16 = 0;
196  *lds_v3i16 = 0;
197  *lds_v4i16 = 0;
198  *lds_v8i16 = 0;
199  *lds_v16i16 = 0;
200
201  *lds_i32 = 0;
202  *lds_v2i32 = 0;
203  *lds_v3i32 = 0;
204  *lds_v4i32 = 0;
205  *lds_v8i32 = 0;
206  *lds_v16i32 = 0;
207
208  *lds_i64 = 0;
209  *lds_v2i64 = 0;
210  *lds_v3i64 = 0;
211  *lds_v4i64 = 0;
212  *lds_v8i64 = 0;
213  *lds_v16i64 = 0;
214
215  *lds_f16 = 0;
216  *lds_v2f16 = 0;
217  *lds_v3f16 = 0;
218  *lds_v4f16 = 0;
219  *lds_v8f16 = 0;
220  *lds_v16f16 = 0;
221
222  *lds_f32 = 0;
223  *lds_v2f32 = 0;
224  *lds_v3f32 = 0;
225  *lds_v4f32 = 0;
226  *lds_v8f32 = 0;
227  *lds_v16f32 = 0;
228
229  *lds_f64 = 0;
230  *lds_v2f64 = 0;
231  *lds_v3f64 = 0;
232  *lds_v4f64 = 0;
233  *lds_v8f64 = 0;
234  *lds_v16f64 = 0;
235}
236
237kernel void local_memory_alignment_arg(
238  volatile local char* lds_i8,
239  volatile local char2* lds_v2i8,
240  volatile local char3* lds_v3i8,
241  volatile local char4* lds_v4i8,
242  volatile local char8* lds_v8i8,
243  volatile local char16* lds_v16i8,
244
245  volatile local short* lds_i16,
246  volatile local short2* lds_v2i16,
247  volatile local short3* lds_v3i16,
248  volatile local short4* lds_v4i16,
249  volatile local short8* lds_v8i16,
250  volatile local short16* lds_v16i16,
251
252  volatile local int* lds_i32,
253  volatile local int2* lds_v2i32,
254  volatile local int3* lds_v3i32,
255  volatile local int4* lds_v4i32,
256  volatile local int8* lds_v8i32,
257  volatile local int16* lds_v16i32,
258
259  volatile local long* lds_i64,
260  volatile local long2* lds_v2i64,
261  volatile local long3* lds_v3i64,
262  volatile local long4* lds_v4i64,
263  volatile local long8* lds_v8i64,
264  volatile local long16* lds_v16i64,
265
266  volatile local half* lds_f16,
267  volatile local half2* lds_v2f16,
268  volatile local half3* lds_v3f16,
269  volatile local half4* lds_v4f16,
270  volatile local half8* lds_v8f16,
271  volatile local half16* lds_v16f16,
272
273  volatile local float* lds_f32,
274  volatile local float2* lds_v2f32,
275  volatile local float3* lds_v3f32,
276  volatile local float4* lds_v4f32,
277  volatile local float8* lds_v8f32,
278  volatile local float16* lds_v16f32,
279
280  volatile local double* lds_f64,
281  volatile local double2* lds_v2f64,
282  volatile local double3* lds_v3f64,
283  volatile local double4* lds_v4f64,
284  volatile local double8* lds_v8f64,
285  volatile local double16* lds_v16f64)
286{
287  *lds_i8 = 0;
288  *lds_v2i8 = 0;
289  *lds_v3i8 = 0;
290  *lds_v4i8 = 0;
291  *lds_v8i8 = 0;
292  *lds_v16i8 = 0;
293
294  *lds_i16 = 0;
295  *lds_v2i16 = 0;
296  *lds_v3i16 = 0;
297  *lds_v4i16 = 0;
298  *lds_v8i16 = 0;
299  *lds_v16i16 = 0;
300
301  *lds_i32 = 0;
302  *lds_v2i32 = 0;
303  *lds_v3i32 = 0;
304  *lds_v4i32 = 0;
305  *lds_v8i32 = 0;
306  *lds_v16i32 = 0;
307
308  *lds_i64 = 0;
309  *lds_v2i64 = 0;
310  *lds_v3i64 = 0;
311  *lds_v4i64 = 0;
312  *lds_v8i64 = 0;
313  *lds_v16i64 = 0;
314
315  *lds_f16 = 0;
316  *lds_v2f16 = 0;
317  *lds_v3f16 = 0;
318  *lds_v4f16 = 0;
319  *lds_v8f16 = 0;
320  *lds_v16f16 = 0;
321
322  *lds_f32 = 0;
323  *lds_v2f32 = 0;
324  *lds_v3f32 = 0;
325  *lds_v4f32 = 0;
326  *lds_v8f32 = 0;
327  *lds_v16f32 = 0;
328
329  *lds_f64 = 0;
330  *lds_v2f64 = 0;
331  *lds_v3f64 = 0;
332  *lds_v4f64 = 0;
333  *lds_v8f64 = 0;
334  *lds_v16f64 = 0;
335}
336
337// CHECK-LABEL: @private_memory_alignment_alloca(
338// CHECK: %private_i8 = alloca [4 x i8], align 1
339// CHECK: %private_v2i8 = alloca [4 x <2 x i8>], align 2
340// CHECK: %private_v3i8 = alloca [4 x <3 x i8>], align 4
341// CHECK: %private_v4i8 = alloca [4 x <4 x i8>], align 4
342// CHECK: %private_v8i8 = alloca [4 x <8 x i8>], align 8
343// CHECK: %private_v16i8 = alloca [4 x <16 x i8>], align 16
344// CHECK: %private_i16 = alloca [4 x i16], align 2
345// CHECK: %private_v2i16 = alloca [4 x <2 x i16>], align 4
346// CHECK: %private_v3i16 = alloca [4 x <3 x i16>], align 8
347// CHECK: %private_v4i16 = alloca [4 x <4 x i16>], align 8
348// CHECK: %private_v8i16 = alloca [4 x <8 x i16>], align 16
349// CHECK: %private_v16i16 = alloca [4 x <16 x i16>], align 32
350// CHECK: %private_i32 = alloca [4 x i32], align 4
351// CHECK: %private_v2i32 = alloca [4 x <2 x i32>], align 8
352// CHECK: %private_v3i32 = alloca [4 x <3 x i32>], align 16
353// CHECK: %private_v4i32 = alloca [4 x <4 x i32>], align 16
354// CHECK: %private_v8i32 = alloca [4 x <8 x i32>], align 32
355// CHECK: %private_v16i32 = alloca [4 x <16 x i32>], align 64
356// CHECK: %private_i64 = alloca [4 x i64], align 8
357// CHECK: %private_v2i64 = alloca [4 x <2 x i64>], align 16
358// CHECK: %private_v3i64 = alloca [4 x <3 x i64>], align 32
359// CHECK: %private_v4i64 = alloca [4 x <4 x i64>], align 32
360// CHECK: %private_v8i64 = alloca [4 x <8 x i64>], align 64
361// CHECK: %private_v16i64 = alloca [4 x <16 x i64>], align 128
362// CHECK: %private_f16 = alloca [4 x half], align 2
363// CHECK: %private_v2f16 = alloca [4 x <2 x half>], align 4
364// CHECK: %private_v3f16 = alloca [4 x <3 x half>], align 8
365// CHECK: %private_v4f16 = alloca [4 x <4 x half>], align 8
366// CHECK: %private_v8f16 = alloca [4 x <8 x half>], align 16
367// CHECK: %private_v16f16 = alloca [4 x <16 x half>], align 32
368// CHECK: %private_f32 = alloca [4 x float], align 4
369// CHECK: %private_v2f32 = alloca [4 x <2 x float>], align 8
370// CHECK: %private_v3f32 = alloca [4 x <3 x float>], align 16
371// CHECK: %private_v4f32 = alloca [4 x <4 x float>], align 16
372// CHECK: %private_v8f32 = alloca [4 x <8 x float>], align 32
373// CHECK: %private_v16f32 = alloca [4 x <16 x float>], align 64
374// CHECK: %private_f64 = alloca [4 x double], align 8
375// CHECK: %private_v2f64 = alloca [4 x <2 x double>], align 16
376// CHECK: %private_v3f64 = alloca [4 x <3 x double>], align 32
377// CHECK: %private_v4f64 = alloca [4 x <4 x double>], align 32
378// CHECK: %private_v8f64 = alloca [4 x <8 x double>], align 64
379// CHECK: %private_v16f64 = alloca [4 x <16 x double>], align 128
380
381// CHECK: store volatile i8 0, i8* %arraydecay, align 1
382// CHECK: store volatile <2 x i8> zeroinitializer, <2 x i8>* %arraydecay{{[0-9]+}}, align 2
383// CHECK: store volatile <4 x i8> <i8 0, i8 0, i8 0, i8 undef>, <4 x i8>* %storetmp, align 4
384// CHECK: store volatile <4 x i8> zeroinitializer, <4 x i8>* %arraydecay{{[0-9]+}}, align 4
385// CHECK: store volatile <8 x i8> zeroinitializer, <8 x i8>* %arraydecay{{[0-9]+}}, align 8
386// CHECK: store volatile <16 x i8> zeroinitializer, <16 x i8>* %arraydecay{{[0-9]+}}, align 16
387// CHECK: store volatile i16 0, i16* %arraydecay{{[0-9]+}}, align 2
388// CHECK: store volatile <2 x i16> zeroinitializer, <2 x i16>* %arraydecay{{[0-9]+}}, align 4
389// CHECK: store volatile <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, <4 x i16>* %storetmp{{[0-9]+}}, align 8
390// CHECK: store volatile <4 x i16> zeroinitializer, <4 x i16>* %arraydecay{{[0-9]+}}, align 8
391// CHECK: store volatile <8 x i16> zeroinitializer, <8 x i16>* %arraydecay{{[0-9]+}}, align 16
392// CHECK: store volatile <16 x i16> zeroinitializer, <16 x i16>* %arraydecay{{[0-9]+}}, align 32
393// CHECK: store volatile i32 0, i32* %arraydecay{{[0-9]+}}, align 4
394// CHECK: store volatile <2 x i32> zeroinitializer, <2 x i32>* %arraydecay{{[0-9]+}}, align 8
395// CHECK: store volatile <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, <4 x i32>* %storetmp16, align 16
396// CHECK: store volatile <4 x i32> zeroinitializer, <4 x i32>* %arraydecay{{[0-9]+}}, align 16
397// CHECK: store volatile <8 x i32> zeroinitializer, <8 x i32>* %arraydecay{{[0-9]+}}, align 32
398// CHECK: store volatile <16 x i32> zeroinitializer, <16 x i32>* %arraydecay{{[0-9]+}}, align 64
399// CHECK: store volatile i64 0, i64* %arraydecay{{[0-9]+}}, align 8
400// CHECK: store volatile <2 x i64> zeroinitializer, <2 x i64>* %arraydecay{{[0-9]+}}, align 16
401// CHECK: store volatile <4 x i64> <i64 0, i64 0, i64 0, i64 undef>, <4 x i64>* %storetmp23, align 32
402// CHECK: store volatile <4 x i64> zeroinitializer, <4 x i64>* %arraydecay{{[0-9]+}}, align 32
403// CHECK: store volatile <8 x i64> zeroinitializer, <8 x i64>* %arraydecay{{[0-9]+}}, align 64
404// CHECK: store volatile <16 x i64> zeroinitializer, <16 x i64>* %arraydecay{{[0-9]+}}, align 128
405// CHECK: store volatile half 0xH0000, half* %arraydecay{{[0-9]+}}, align 2
406// CHECK: store volatile <2 x half> zeroinitializer, <2 x half>* %arraydecay{{[0-9]+}}, align 4
407// CHECK: store volatile <4 x half> <half 0xH0000, half 0xH0000, half 0xH0000, half undef>, <4 x half>* %storetmp{{[0-9]+}}, align 8
408// CHECK: store volatile <4 x half> zeroinitializer, <4 x half>* %arraydecay{{[0-9]+}}, align 8
409// CHECK: store volatile <8 x half> zeroinitializer, <8 x half>* %arraydecay{{[0-9]+}}, align 16
410// CHECK: store volatile <16 x half> zeroinitializer, <16 x half>* %arraydecay{{[0-9]+}}, align 32
411// CHECK: store volatile float 0.000000e+00, float* %arraydecay34, align 4
412// CHECK: store volatile <2 x float> zeroinitializer, <2 x float>* %arraydecay{{[0-9]+}}, align 8
413// CHECK: store volatile <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef>, <4 x float>* %storetmp{{[0-9]+}}, align 16
414// CHECK: store volatile <4 x float> zeroinitializer, <4 x float>* %arraydecay{{[0-9]+}}, align 16
415// CHECK: store volatile <8 x float> zeroinitializer, <8 x float>* %arraydecay{{[0-9]+}}, align 32
416// CHECK: store volatile <16 x float> zeroinitializer, <16 x float>* %arraydecay{{[0-9]+}}, align 64
417// CHECK: store volatile double 0.000000e+00, double* %arraydecay{{[0-9]+}}, align 8
418// CHECK: store volatile <2 x double> zeroinitializer, <2 x double>* %arraydecay{{[0-9]+}}, align 16
419// CHECK: store volatile <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double undef>, <4 x double>* %storetmp{{[0-9]+}}, align 32
420// CHECK: store volatile <4 x double> zeroinitializer, <4 x double>* %arraydecay{{[0-9]+}}, align 32
421// CHECK: store volatile <8 x double> zeroinitializer, <8 x double>* %arraydecay{{[0-9]+}}, align 64
422// CHECK: store volatile <16 x double> zeroinitializer, <16 x double>* %arraydecay{{[0-9]+}}, align 128
423kernel void private_memory_alignment_alloca()
424{
425  volatile private char private_i8[4];
426  volatile private char2 private_v2i8[4];
427  volatile private char3 private_v3i8[4];
428  volatile private char4 private_v4i8[4];
429  volatile private char8 private_v8i8[4];
430  volatile private char16 private_v16i8[4];
431
432  volatile private short private_i16[4];
433  volatile private short2 private_v2i16[4];
434  volatile private short3 private_v3i16[4];
435  volatile private short4 private_v4i16[4];
436  volatile private short8 private_v8i16[4];
437  volatile private short16 private_v16i16[4];
438
439  volatile private int private_i32[4];
440  volatile private int2 private_v2i32[4];
441  volatile private int3 private_v3i32[4];
442  volatile private int4 private_v4i32[4];
443  volatile private int8 private_v8i32[4];
444  volatile private int16 private_v16i32[4];
445
446  volatile private long private_i64[4];
447  volatile private long2 private_v2i64[4];
448  volatile private long3 private_v3i64[4];
449  volatile private long4 private_v4i64[4];
450  volatile private long8 private_v8i64[4];
451  volatile private long16 private_v16i64[4];
452
453  volatile private half private_f16[4];
454  volatile private half2 private_v2f16[4];
455  volatile private half3 private_v3f16[4];
456  volatile private half4 private_v4f16[4];
457  volatile private half8 private_v8f16[4];
458  volatile private half16 private_v16f16[4];
459
460  volatile private float private_f32[4];
461  volatile private float2 private_v2f32[4];
462  volatile private float3 private_v3f32[4];
463  volatile private float4 private_v4f32[4];
464  volatile private float8 private_v8f32[4];
465  volatile private float16 private_v16f32[4];
466
467  volatile private double private_f64[4];
468  volatile private double2 private_v2f64[4];
469  volatile private double3 private_v3f64[4];
470  volatile private double4 private_v4f64[4];
471  volatile private double8 private_v8f64[4];
472  volatile private double16 private_v16f64[4];
473
474  *private_i8 = 0;
475  *private_v2i8 = 0;
476  *private_v3i8 = 0;
477  *private_v4i8 = 0;
478  *private_v8i8 = 0;
479  *private_v16i8 = 0;
480
481  *private_i16 = 0;
482  *private_v2i16 = 0;
483  *private_v3i16 = 0;
484  *private_v4i16 = 0;
485  *private_v8i16 = 0;
486  *private_v16i16 = 0;
487
488  *private_i32 = 0;
489  *private_v2i32 = 0;
490  *private_v3i32 = 0;
491  *private_v4i32 = 0;
492  *private_v8i32 = 0;
493  *private_v16i32 = 0;
494
495  *private_i64 = 0;
496  *private_v2i64 = 0;
497  *private_v3i64 = 0;
498  *private_v4i64 = 0;
499  *private_v8i64 = 0;
500  *private_v16i64 = 0;
501
502  *private_f16 = 0;
503  *private_v2f16 = 0;
504  *private_v3f16 = 0;
505  *private_v4f16 = 0;
506  *private_v8f16 = 0;
507  *private_v16f16 = 0;
508
509  *private_f32 = 0;
510  *private_v2f32 = 0;
511  *private_v3f32 = 0;
512  *private_v4f32 = 0;
513  *private_v8f32 = 0;
514  *private_v16f32 = 0;
515
516  *private_f64 = 0;
517  *private_v2f64 = 0;
518  *private_v3f64 = 0;
519  *private_v4f64 = 0;
520  *private_v8f64 = 0;
521  *private_v16f64 = 0;
522}
523