1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2 // REQUIRES: powerpc-registered-target
3 // REQUIRES: nvptx-registered-target
4 
5 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=CHECK1
6 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
7 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK2
8 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t
9 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefix=CHECK3
10 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefix=CHECK4
11 
12 // expected-no-diagnostics
13 #ifndef HEADER
14 #define HEADER
15 
16 template <typename T>
17 int foo(const T &t) {
18   #pragma omp target parallel
19   t();
20   return 0;
21 }
22 
23 struct S {
24   int a = 15;
25   int foo() {
26     auto &&L = [&]() { return a; };
27 #pragma omp target
28     L();
29 #pragma omp target parallel
30     L();
31     return a + ::foo(L);
32   }
33 } s;
34 
35 int main(int argc, char **argv) {
36   int &b = argc;
37   int &&c = 1;
38   int *d = &argc;
39   int a;
40   auto &&L = [&]() { return argc + b + c + reinterpret_cast<long int>(d) + a; };
41 #pragma omp target firstprivate(argc) map(to : a)
42   L();
43 #pragma omp target parallel
44   L();
45   return argc + s.foo();
46 }
47 
48 #endif // HEADER
49 // CHECK1-LABEL: define {{[^@]+}}@main
50 // CHECK1-SAME: (i32 signext [[ARGC:%.*]], i8** [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
51 // CHECK1-NEXT:  entry:
52 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
53 // CHECK1-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
54 // CHECK1-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
55 // CHECK1-NEXT:    [[B:%.*]] = alloca i32*, align 8
56 // CHECK1-NEXT:    [[C:%.*]] = alloca i32*, align 8
57 // CHECK1-NEXT:    [[REF_TMP:%.*]] = alloca i32, align 4
58 // CHECK1-NEXT:    [[D:%.*]] = alloca i32*, align 8
59 // CHECK1-NEXT:    [[A:%.*]] = alloca i32, align 4
60 // CHECK1-NEXT:    [[L:%.*]] = alloca %class.anon*, align 8
61 // CHECK1-NEXT:    [[REF_TMP1:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
62 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32*, align 8
63 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca i32*, align 8
64 // CHECK1-NEXT:    [[_TMP3:%.*]] = alloca %class.anon*, align 8
65 // CHECK1-NEXT:    [[ARGC_CASTED:%.*]] = alloca i64, align 8
66 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [11 x i8*], align 8
67 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [11 x i8*], align 8
68 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [11 x i8*], align 8
69 // CHECK1-NEXT:    [[_TMP4:%.*]] = alloca i32*, align 8
70 // CHECK1-NEXT:    [[_TMP5:%.*]] = alloca i32*, align 8
71 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca %class.anon*, align 8
72 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS7:%.*]] = alloca [11 x i8*], align 8
73 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS8:%.*]] = alloca [11 x i8*], align 8
74 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS9:%.*]] = alloca [11 x i8*], align 8
75 // CHECK1-NEXT:    store i32 0, i32* [[RETVAL]], align 4
76 // CHECK1-NEXT:    store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
77 // CHECK1-NEXT:    store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8
78 // CHECK1-NEXT:    store i32* [[ARGC_ADDR]], i32** [[B]], align 8
79 // CHECK1-NEXT:    store i32 1, i32* [[REF_TMP]], align 4
80 // CHECK1-NEXT:    store i32* [[REF_TMP]], i32** [[C]], align 8
81 // CHECK1-NEXT:    store i32* [[ARGC_ADDR]], i32** [[D]], align 8
82 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP1]], i32 0, i32 0
83 // CHECK1-NEXT:    store i32* [[ARGC_ADDR]], i32** [[TMP0]], align 8
84 // CHECK1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP1]], i32 0, i32 1
85 // CHECK1-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[B]], align 8
86 // CHECK1-NEXT:    store i32* [[TMP2]], i32** [[TMP1]], align 8
87 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP1]], i32 0, i32 2
88 // CHECK1-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[C]], align 8
89 // CHECK1-NEXT:    store i32* [[TMP4]], i32** [[TMP3]], align 8
90 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP1]], i32 0, i32 3
91 // CHECK1-NEXT:    store i32** [[D]], i32*** [[TMP5]], align 8
92 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[REF_TMP1]], i32 0, i32 4
93 // CHECK1-NEXT:    store i32* [[A]], i32** [[TMP6]], align 8
94 // CHECK1-NEXT:    store %class.anon* [[REF_TMP1]], %class.anon** [[L]], align 8
95 // CHECK1-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[B]], align 8
96 // CHECK1-NEXT:    store i32* [[TMP7]], i32** [[TMP]], align 8
97 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32*, i32** [[C]], align 8
98 // CHECK1-NEXT:    store i32* [[TMP8]], i32** [[_TMP2]], align 8
99 // CHECK1-NEXT:    [[TMP9:%.*]] = load %class.anon*, %class.anon** [[L]], align 8
100 // CHECK1-NEXT:    store %class.anon* [[TMP9]], %class.anon** [[_TMP3]], align 8
101 // CHECK1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
102 // CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32*
103 // CHECK1-NEXT:    store i32 [[TMP10]], i32* [[CONV]], align 4
104 // CHECK1-NEXT:    [[TMP11:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8
105 // CHECK1-NEXT:    [[TMP12:%.*]] = load i32*, i32** [[TMP]], align 8
106 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32*, i32** [[_TMP2]], align 8
107 // CHECK1-NEXT:    [[TMP14:%.*]] = load i32*, i32** [[D]], align 8
108 // CHECK1-NEXT:    [[TMP15:%.*]] = load %class.anon*, %class.anon** [[_TMP3]], align 8
109 // CHECK1-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP15]], i32 0, i32 0
110 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP15]], i32 0, i32 0
111 // CHECK1-NEXT:    [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8
112 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP15]], i32 0, i32 1
113 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP15]], i32 0, i32 1
114 // CHECK1-NEXT:    [[TMP21:%.*]] = load i32*, i32** [[TMP20]], align 8
115 // CHECK1-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP15]], i32 0, i32 2
116 // CHECK1-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP15]], i32 0, i32 2
117 // CHECK1-NEXT:    [[TMP24:%.*]] = load i32*, i32** [[TMP23]], align 8
118 // CHECK1-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP15]], i32 0, i32 3
119 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP15]], i32 0, i32 3
120 // CHECK1-NEXT:    [[TMP27:%.*]] = load i32**, i32*** [[TMP26]], align 8
121 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP15]], i32 0, i32 4
122 // CHECK1-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP15]], i32 0, i32 4
123 // CHECK1-NEXT:    [[TMP30:%.*]] = load i32*, i32** [[TMP29]], align 8
124 // CHECK1-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
125 // CHECK1-NEXT:    [[TMP32:%.*]] = bitcast i8** [[TMP31]] to i64*
126 // CHECK1-NEXT:    store i64 [[TMP11]], i64* [[TMP32]], align 8
127 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
128 // CHECK1-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to i64*
129 // CHECK1-NEXT:    store i64 [[TMP11]], i64* [[TMP34]], align 8
130 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
131 // CHECK1-NEXT:    store i8* null, i8** [[TMP35]], align 8
132 // CHECK1-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
133 // CHECK1-NEXT:    [[TMP37:%.*]] = bitcast i8** [[TMP36]] to i32**
134 // CHECK1-NEXT:    store i32* [[TMP12]], i32** [[TMP37]], align 8
135 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
136 // CHECK1-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to i32**
137 // CHECK1-NEXT:    store i32* [[TMP12]], i32** [[TMP39]], align 8
138 // CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
139 // CHECK1-NEXT:    store i8* null, i8** [[TMP40]], align 8
140 // CHECK1-NEXT:    [[TMP41:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
141 // CHECK1-NEXT:    [[TMP42:%.*]] = bitcast i8** [[TMP41]] to i32**
142 // CHECK1-NEXT:    store i32* [[TMP13]], i32** [[TMP42]], align 8
143 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
144 // CHECK1-NEXT:    [[TMP44:%.*]] = bitcast i8** [[TMP43]] to i32**
145 // CHECK1-NEXT:    store i32* [[TMP13]], i32** [[TMP44]], align 8
146 // CHECK1-NEXT:    [[TMP45:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
147 // CHECK1-NEXT:    store i8* null, i8** [[TMP45]], align 8
148 // CHECK1-NEXT:    [[TMP46:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 3
149 // CHECK1-NEXT:    [[TMP47:%.*]] = bitcast i8** [[TMP46]] to i32**
150 // CHECK1-NEXT:    store i32* [[TMP14]], i32** [[TMP47]], align 8
151 // CHECK1-NEXT:    [[TMP48:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 3
152 // CHECK1-NEXT:    [[TMP49:%.*]] = bitcast i8** [[TMP48]] to i32**
153 // CHECK1-NEXT:    store i32* [[TMP14]], i32** [[TMP49]], align 8
154 // CHECK1-NEXT:    [[TMP50:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 3
155 // CHECK1-NEXT:    store i8* null, i8** [[TMP50]], align 8
156 // CHECK1-NEXT:    [[TMP51:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 4
157 // CHECK1-NEXT:    [[TMP52:%.*]] = bitcast i8** [[TMP51]] to i32**
158 // CHECK1-NEXT:    store i32* [[A]], i32** [[TMP52]], align 8
159 // CHECK1-NEXT:    [[TMP53:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 4
160 // CHECK1-NEXT:    [[TMP54:%.*]] = bitcast i8** [[TMP53]] to i32**
161 // CHECK1-NEXT:    store i32* [[A]], i32** [[TMP54]], align 8
162 // CHECK1-NEXT:    [[TMP55:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 4
163 // CHECK1-NEXT:    store i8* null, i8** [[TMP55]], align 8
164 // CHECK1-NEXT:    [[TMP56:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 5
165 // CHECK1-NEXT:    [[TMP57:%.*]] = bitcast i8** [[TMP56]] to %class.anon**
166 // CHECK1-NEXT:    store %class.anon* [[TMP15]], %class.anon** [[TMP57]], align 8
167 // CHECK1-NEXT:    [[TMP58:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 5
168 // CHECK1-NEXT:    [[TMP59:%.*]] = bitcast i8** [[TMP58]] to %class.anon**
169 // CHECK1-NEXT:    store %class.anon* [[TMP15]], %class.anon** [[TMP59]], align 8
170 // CHECK1-NEXT:    [[TMP60:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 5
171 // CHECK1-NEXT:    store i8* null, i8** [[TMP60]], align 8
172 // CHECK1-NEXT:    [[TMP61:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 6
173 // CHECK1-NEXT:    [[TMP62:%.*]] = bitcast i8** [[TMP61]] to i32***
174 // CHECK1-NEXT:    store i32** [[TMP16]], i32*** [[TMP62]], align 8
175 // CHECK1-NEXT:    [[TMP63:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 6
176 // CHECK1-NEXT:    [[TMP64:%.*]] = bitcast i8** [[TMP63]] to i32**
177 // CHECK1-NEXT:    store i32* [[TMP18]], i32** [[TMP64]], align 8
178 // CHECK1-NEXT:    [[TMP65:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 6
179 // CHECK1-NEXT:    store i8* null, i8** [[TMP65]], align 8
180 // CHECK1-NEXT:    [[TMP66:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 7
181 // CHECK1-NEXT:    [[TMP67:%.*]] = bitcast i8** [[TMP66]] to i32***
182 // CHECK1-NEXT:    store i32** [[TMP19]], i32*** [[TMP67]], align 8
183 // CHECK1-NEXT:    [[TMP68:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 7
184 // CHECK1-NEXT:    [[TMP69:%.*]] = bitcast i8** [[TMP68]] to i32**
185 // CHECK1-NEXT:    store i32* [[TMP21]], i32** [[TMP69]], align 8
186 // CHECK1-NEXT:    [[TMP70:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 7
187 // CHECK1-NEXT:    store i8* null, i8** [[TMP70]], align 8
188 // CHECK1-NEXT:    [[TMP71:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 8
189 // CHECK1-NEXT:    [[TMP72:%.*]] = bitcast i8** [[TMP71]] to i32***
190 // CHECK1-NEXT:    store i32** [[TMP22]], i32*** [[TMP72]], align 8
191 // CHECK1-NEXT:    [[TMP73:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 8
192 // CHECK1-NEXT:    [[TMP74:%.*]] = bitcast i8** [[TMP73]] to i32**
193 // CHECK1-NEXT:    store i32* [[TMP24]], i32** [[TMP74]], align 8
194 // CHECK1-NEXT:    [[TMP75:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 8
195 // CHECK1-NEXT:    store i8* null, i8** [[TMP75]], align 8
196 // CHECK1-NEXT:    [[TMP76:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 9
197 // CHECK1-NEXT:    [[TMP77:%.*]] = bitcast i8** [[TMP76]] to i32****
198 // CHECK1-NEXT:    store i32*** [[TMP25]], i32**** [[TMP77]], align 8
199 // CHECK1-NEXT:    [[TMP78:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 9
200 // CHECK1-NEXT:    [[TMP79:%.*]] = bitcast i8** [[TMP78]] to i32***
201 // CHECK1-NEXT:    store i32** [[TMP27]], i32*** [[TMP79]], align 8
202 // CHECK1-NEXT:    [[TMP80:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 9
203 // CHECK1-NEXT:    store i8* null, i8** [[TMP80]], align 8
204 // CHECK1-NEXT:    [[TMP81:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 10
205 // CHECK1-NEXT:    [[TMP82:%.*]] = bitcast i8** [[TMP81]] to i32***
206 // CHECK1-NEXT:    store i32** [[TMP28]], i32*** [[TMP82]], align 8
207 // CHECK1-NEXT:    [[TMP83:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 10
208 // CHECK1-NEXT:    [[TMP84:%.*]] = bitcast i8** [[TMP83]] to i32**
209 // CHECK1-NEXT:    store i32* [[TMP30]], i32** [[TMP84]], align 8
210 // CHECK1-NEXT:    [[TMP85:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 10
211 // CHECK1-NEXT:    store i8* null, i8** [[TMP85]], align 8
212 // CHECK1-NEXT:    [[TMP86:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
213 // CHECK1-NEXT:    [[TMP87:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
214 // CHECK1-NEXT:    [[TMP88:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1:[0-9]+]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41.region_id, i32 11, i8** [[TMP86]], i8** [[TMP87]], i64* getelementptr inbounds ([11 x i64], [11 x i64]* @.offload_sizes, i32 0, i32 0), i64* getelementptr inbounds ([11 x i64], [11 x i64]* @.offload_maptypes, i32 0, i32 0), i8** null, i8** null)
215 // CHECK1-NEXT:    [[TMP89:%.*]] = icmp ne i32 [[TMP88]], 0
216 // CHECK1-NEXT:    br i1 [[TMP89]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
217 // CHECK1:       omp_offload.failed:
218 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41(i64 [[TMP11]], i32* [[TMP12]], i32* [[TMP13]], i32* [[TMP14]], i32* [[A]], %class.anon* [[TMP15]]) #[[ATTR4:[0-9]+]]
219 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
220 // CHECK1:       omp_offload.cont:
221 // CHECK1-NEXT:    [[TMP90:%.*]] = load i32*, i32** [[B]], align 8
222 // CHECK1-NEXT:    store i32* [[TMP90]], i32** [[_TMP4]], align 8
223 // CHECK1-NEXT:    [[TMP91:%.*]] = load i32*, i32** [[C]], align 8
224 // CHECK1-NEXT:    store i32* [[TMP91]], i32** [[_TMP5]], align 8
225 // CHECK1-NEXT:    [[TMP92:%.*]] = load %class.anon*, %class.anon** [[L]], align 8
226 // CHECK1-NEXT:    store %class.anon* [[TMP92]], %class.anon** [[_TMP6]], align 8
227 // CHECK1-NEXT:    [[TMP93:%.*]] = load i32*, i32** [[_TMP4]], align 8
228 // CHECK1-NEXT:    [[TMP94:%.*]] = load i32*, i32** [[_TMP5]], align 8
229 // CHECK1-NEXT:    [[TMP95:%.*]] = load i32*, i32** [[D]], align 8
230 // CHECK1-NEXT:    [[TMP96:%.*]] = load %class.anon*, %class.anon** [[_TMP6]], align 8
231 // CHECK1-NEXT:    [[TMP97:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP96]], i32 0, i32 0
232 // CHECK1-NEXT:    [[TMP98:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP96]], i32 0, i32 0
233 // CHECK1-NEXT:    [[TMP99:%.*]] = load i32*, i32** [[TMP98]], align 8
234 // CHECK1-NEXT:    [[TMP100:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP96]], i32 0, i32 1
235 // CHECK1-NEXT:    [[TMP101:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP96]], i32 0, i32 1
236 // CHECK1-NEXT:    [[TMP102:%.*]] = load i32*, i32** [[TMP101]], align 8
237 // CHECK1-NEXT:    [[TMP103:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP96]], i32 0, i32 2
238 // CHECK1-NEXT:    [[TMP104:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP96]], i32 0, i32 2
239 // CHECK1-NEXT:    [[TMP105:%.*]] = load i32*, i32** [[TMP104]], align 8
240 // CHECK1-NEXT:    [[TMP106:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP96]], i32 0, i32 3
241 // CHECK1-NEXT:    [[TMP107:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP96]], i32 0, i32 3
242 // CHECK1-NEXT:    [[TMP108:%.*]] = load i32**, i32*** [[TMP107]], align 8
243 // CHECK1-NEXT:    [[TMP109:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP96]], i32 0, i32 4
244 // CHECK1-NEXT:    [[TMP110:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP96]], i32 0, i32 4
245 // CHECK1-NEXT:    [[TMP111:%.*]] = load i32*, i32** [[TMP110]], align 8
246 // CHECK1-NEXT:    [[TMP112:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
247 // CHECK1-NEXT:    [[TMP113:%.*]] = bitcast i8** [[TMP112]] to i32**
248 // CHECK1-NEXT:    store i32* [[ARGC_ADDR]], i32** [[TMP113]], align 8
249 // CHECK1-NEXT:    [[TMP114:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
250 // CHECK1-NEXT:    [[TMP115:%.*]] = bitcast i8** [[TMP114]] to i32**
251 // CHECK1-NEXT:    store i32* [[ARGC_ADDR]], i32** [[TMP115]], align 8
252 // CHECK1-NEXT:    [[TMP116:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 0
253 // CHECK1-NEXT:    store i8* null, i8** [[TMP116]], align 8
254 // CHECK1-NEXT:    [[TMP117:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 1
255 // CHECK1-NEXT:    [[TMP118:%.*]] = bitcast i8** [[TMP117]] to i32**
256 // CHECK1-NEXT:    store i32* [[TMP93]], i32** [[TMP118]], align 8
257 // CHECK1-NEXT:    [[TMP119:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 1
258 // CHECK1-NEXT:    [[TMP120:%.*]] = bitcast i8** [[TMP119]] to i32**
259 // CHECK1-NEXT:    store i32* [[TMP93]], i32** [[TMP120]], align 8
260 // CHECK1-NEXT:    [[TMP121:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 1
261 // CHECK1-NEXT:    store i8* null, i8** [[TMP121]], align 8
262 // CHECK1-NEXT:    [[TMP122:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 2
263 // CHECK1-NEXT:    [[TMP123:%.*]] = bitcast i8** [[TMP122]] to i32**
264 // CHECK1-NEXT:    store i32* [[TMP94]], i32** [[TMP123]], align 8
265 // CHECK1-NEXT:    [[TMP124:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 2
266 // CHECK1-NEXT:    [[TMP125:%.*]] = bitcast i8** [[TMP124]] to i32**
267 // CHECK1-NEXT:    store i32* [[TMP94]], i32** [[TMP125]], align 8
268 // CHECK1-NEXT:    [[TMP126:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 2
269 // CHECK1-NEXT:    store i8* null, i8** [[TMP126]], align 8
270 // CHECK1-NEXT:    [[TMP127:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 3
271 // CHECK1-NEXT:    [[TMP128:%.*]] = bitcast i8** [[TMP127]] to i32**
272 // CHECK1-NEXT:    store i32* [[TMP95]], i32** [[TMP128]], align 8
273 // CHECK1-NEXT:    [[TMP129:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 3
274 // CHECK1-NEXT:    [[TMP130:%.*]] = bitcast i8** [[TMP129]] to i32**
275 // CHECK1-NEXT:    store i32* [[TMP95]], i32** [[TMP130]], align 8
276 // CHECK1-NEXT:    [[TMP131:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 3
277 // CHECK1-NEXT:    store i8* null, i8** [[TMP131]], align 8
278 // CHECK1-NEXT:    [[TMP132:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 4
279 // CHECK1-NEXT:    [[TMP133:%.*]] = bitcast i8** [[TMP132]] to i32**
280 // CHECK1-NEXT:    store i32* [[A]], i32** [[TMP133]], align 8
281 // CHECK1-NEXT:    [[TMP134:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 4
282 // CHECK1-NEXT:    [[TMP135:%.*]] = bitcast i8** [[TMP134]] to i32**
283 // CHECK1-NEXT:    store i32* [[A]], i32** [[TMP135]], align 8
284 // CHECK1-NEXT:    [[TMP136:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 4
285 // CHECK1-NEXT:    store i8* null, i8** [[TMP136]], align 8
286 // CHECK1-NEXT:    [[TMP137:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 5
287 // CHECK1-NEXT:    [[TMP138:%.*]] = bitcast i8** [[TMP137]] to %class.anon**
288 // CHECK1-NEXT:    store %class.anon* [[TMP96]], %class.anon** [[TMP138]], align 8
289 // CHECK1-NEXT:    [[TMP139:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 5
290 // CHECK1-NEXT:    [[TMP140:%.*]] = bitcast i8** [[TMP139]] to %class.anon**
291 // CHECK1-NEXT:    store %class.anon* [[TMP96]], %class.anon** [[TMP140]], align 8
292 // CHECK1-NEXT:    [[TMP141:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 5
293 // CHECK1-NEXT:    store i8* null, i8** [[TMP141]], align 8
294 // CHECK1-NEXT:    [[TMP142:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 6
295 // CHECK1-NEXT:    [[TMP143:%.*]] = bitcast i8** [[TMP142]] to i32***
296 // CHECK1-NEXT:    store i32** [[TMP97]], i32*** [[TMP143]], align 8
297 // CHECK1-NEXT:    [[TMP144:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 6
298 // CHECK1-NEXT:    [[TMP145:%.*]] = bitcast i8** [[TMP144]] to i32**
299 // CHECK1-NEXT:    store i32* [[TMP99]], i32** [[TMP145]], align 8
300 // CHECK1-NEXT:    [[TMP146:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 6
301 // CHECK1-NEXT:    store i8* null, i8** [[TMP146]], align 8
302 // CHECK1-NEXT:    [[TMP147:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 7
303 // CHECK1-NEXT:    [[TMP148:%.*]] = bitcast i8** [[TMP147]] to i32***
304 // CHECK1-NEXT:    store i32** [[TMP100]], i32*** [[TMP148]], align 8
305 // CHECK1-NEXT:    [[TMP149:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 7
306 // CHECK1-NEXT:    [[TMP150:%.*]] = bitcast i8** [[TMP149]] to i32**
307 // CHECK1-NEXT:    store i32* [[TMP102]], i32** [[TMP150]], align 8
308 // CHECK1-NEXT:    [[TMP151:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 7
309 // CHECK1-NEXT:    store i8* null, i8** [[TMP151]], align 8
310 // CHECK1-NEXT:    [[TMP152:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 8
311 // CHECK1-NEXT:    [[TMP153:%.*]] = bitcast i8** [[TMP152]] to i32***
312 // CHECK1-NEXT:    store i32** [[TMP103]], i32*** [[TMP153]], align 8
313 // CHECK1-NEXT:    [[TMP154:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 8
314 // CHECK1-NEXT:    [[TMP155:%.*]] = bitcast i8** [[TMP154]] to i32**
315 // CHECK1-NEXT:    store i32* [[TMP105]], i32** [[TMP155]], align 8
316 // CHECK1-NEXT:    [[TMP156:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 8
317 // CHECK1-NEXT:    store i8* null, i8** [[TMP156]], align 8
318 // CHECK1-NEXT:    [[TMP157:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 9
319 // CHECK1-NEXT:    [[TMP158:%.*]] = bitcast i8** [[TMP157]] to i32****
320 // CHECK1-NEXT:    store i32*** [[TMP106]], i32**** [[TMP158]], align 8
321 // CHECK1-NEXT:    [[TMP159:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 9
322 // CHECK1-NEXT:    [[TMP160:%.*]] = bitcast i8** [[TMP159]] to i32***
323 // CHECK1-NEXT:    store i32** [[TMP108]], i32*** [[TMP160]], align 8
324 // CHECK1-NEXT:    [[TMP161:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 9
325 // CHECK1-NEXT:    store i8* null, i8** [[TMP161]], align 8
326 // CHECK1-NEXT:    [[TMP162:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 10
327 // CHECK1-NEXT:    [[TMP163:%.*]] = bitcast i8** [[TMP162]] to i32***
328 // CHECK1-NEXT:    store i32** [[TMP109]], i32*** [[TMP163]], align 8
329 // CHECK1-NEXT:    [[TMP164:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 10
330 // CHECK1-NEXT:    [[TMP165:%.*]] = bitcast i8** [[TMP164]] to i32**
331 // CHECK1-NEXT:    store i32* [[TMP111]], i32** [[TMP165]], align 8
332 // CHECK1-NEXT:    [[TMP166:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_MAPPERS9]], i64 0, i64 10
333 // CHECK1-NEXT:    store i8* null, i8** [[TMP166]], align 8
334 // CHECK1-NEXT:    [[TMP167:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0
335 // CHECK1-NEXT:    [[TMP168:%.*]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[DOTOFFLOAD_PTRS8]], i32 0, i32 0
336 // CHECK1-NEXT:    [[TMP169:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43.region_id, i32 11, i8** [[TMP167]], i8** [[TMP168]], i64* getelementptr inbounds ([11 x i64], [11 x i64]* @.offload_sizes.1, i32 0, i32 0), i64* getelementptr inbounds ([11 x i64], [11 x i64]* @.offload_maptypes.2, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
337 // CHECK1-NEXT:    [[TMP170:%.*]] = icmp ne i32 [[TMP169]], 0
338 // CHECK1-NEXT:    br i1 [[TMP170]], label [[OMP_OFFLOAD_FAILED10:%.*]], label [[OMP_OFFLOAD_CONT11:%.*]]
339 // CHECK1:       omp_offload.failed10:
340 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43(i32* [[ARGC_ADDR]], i32* [[TMP93]], i32* [[TMP94]], i32* [[TMP95]], i32* [[A]], %class.anon* [[TMP96]]) #[[ATTR4]]
341 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT11]]
342 // CHECK1:       omp_offload.cont11:
343 // CHECK1-NEXT:    [[TMP171:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
344 // CHECK1-NEXT:    [[CALL:%.*]] = call signext i32 @_ZN1S3fooEv(%struct.S* nonnull align 4 dereferenceable(4) @s)
345 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP171]], [[CALL]]
346 // CHECK1-NEXT:    ret i32 [[ADD]]
347 //
348 //
349 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41
350 // CHECK1-SAME: (i64 [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1:[0-9]+]] {
351 // CHECK1-NEXT:  entry:
352 // CHECK1-NEXT:    [[ARGC_ADDR:%.*]] = alloca i64, align 8
353 // CHECK1-NEXT:    [[B_ADDR:%.*]] = alloca i32*, align 8
354 // CHECK1-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
355 // CHECK1-NEXT:    [[D_ADDR:%.*]] = alloca i32*, align 8
356 // CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
357 // CHECK1-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon*, align 8
358 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32*, align 8
359 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32*, align 8
360 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca %class.anon*, align 8
361 // CHECK1-NEXT:    [[L3:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
362 // CHECK1-NEXT:    [[_TMP4:%.*]] = alloca %class.anon*, align 8
363 // CHECK1-NEXT:    [[B5:%.*]] = alloca i32, align 4
364 // CHECK1-NEXT:    [[_TMP6:%.*]] = alloca i32*, align 8
365 // CHECK1-NEXT:    [[C7:%.*]] = alloca i32, align 4
366 // CHECK1-NEXT:    [[_TMP8:%.*]] = alloca i32*, align 8
367 // CHECK1-NEXT:    store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8
368 // CHECK1-NEXT:    store i32* [[B]], i32** [[B_ADDR]], align 8
369 // CHECK1-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 8
370 // CHECK1-NEXT:    store i32* [[D]], i32** [[D_ADDR]], align 8
371 // CHECK1-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
372 // CHECK1-NEXT:    store %class.anon* [[L]], %class.anon** [[L_ADDR]], align 8
373 // CHECK1-NEXT:    [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32*
374 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[B_ADDR]], align 8
375 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[C_ADDR]], align 8
376 // CHECK1-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8
377 // CHECK1-NEXT:    [[TMP3:%.*]] = load %class.anon*, %class.anon** [[L_ADDR]], align 8
378 // CHECK1-NEXT:    store i32* [[TMP0]], i32** [[TMP]], align 8
379 // CHECK1-NEXT:    store i32* [[TMP1]], i32** [[_TMP1]], align 8
380 // CHECK1-NEXT:    store %class.anon* [[TMP3]], %class.anon** [[_TMP2]], align 8
381 // CHECK1-NEXT:    [[TMP4:%.*]] = load %class.anon*, %class.anon** [[_TMP2]], align 8
382 // CHECK1-NEXT:    [[TMP5:%.*]] = bitcast %class.anon* [[L3]] to i8*
383 // CHECK1-NEXT:    [[TMP6:%.*]] = bitcast %class.anon* [[TMP4]] to i8*
384 // CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP5]], i8* align 8 [[TMP6]], i64 40, i1 false)
385 // CHECK1-NEXT:    store %class.anon* [[L3]], %class.anon** [[_TMP4]], align 8
386 // CHECK1-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[TMP]], align 8
387 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4
388 // CHECK1-NEXT:    store i32 [[TMP8]], i32* [[B5]], align 4
389 // CHECK1-NEXT:    store i32* [[B5]], i32** [[_TMP6]], align 8
390 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32*, i32** [[_TMP1]], align 8
391 // CHECK1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
392 // CHECK1-NEXT:    store i32 [[TMP10]], i32* [[C7]], align 4
393 // CHECK1-NEXT:    store i32* [[C7]], i32** [[_TMP8]], align 8
394 // CHECK1-NEXT:    [[TMP11:%.*]] = load %class.anon*, %class.anon** [[_TMP4]], align 8
395 // CHECK1-NEXT:    [[CALL:%.*]] = call i64 @"_ZZ4mainENK3$_0clEv"(%class.anon* nonnull align 8 dereferenceable(40) [[TMP11]])
396 // CHECK1-NEXT:    ret void
397 //
398 //
399 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43
400 // CHECK1-SAME: (i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1]] {
401 // CHECK1-NEXT:  entry:
402 // CHECK1-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
403 // CHECK1-NEXT:    [[B_ADDR:%.*]] = alloca i32*, align 8
404 // CHECK1-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
405 // CHECK1-NEXT:    [[D_ADDR:%.*]] = alloca i32*, align 8
406 // CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
407 // CHECK1-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon*, align 8
408 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32*, align 8
409 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32*, align 8
410 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca %class.anon*, align 8
411 // CHECK1-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
412 // CHECK1-NEXT:    store i32* [[B]], i32** [[B_ADDR]], align 8
413 // CHECK1-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 8
414 // CHECK1-NEXT:    store i32* [[D]], i32** [[D_ADDR]], align 8
415 // CHECK1-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
416 // CHECK1-NEXT:    store %class.anon* [[L]], %class.anon** [[L_ADDR]], align 8
417 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
418 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[B_ADDR]], align 8
419 // CHECK1-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[C_ADDR]], align 8
420 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
421 // CHECK1-NEXT:    [[TMP4:%.*]] = load %class.anon*, %class.anon** [[L_ADDR]], align 8
422 // CHECK1-NEXT:    store i32* [[TMP1]], i32** [[TMP]], align 8
423 // CHECK1-NEXT:    store i32* [[TMP2]], i32** [[_TMP1]], align 8
424 // CHECK1-NEXT:    store %class.anon* [[TMP4]], %class.anon** [[_TMP2]], align 8
425 // CHECK1-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[TMP]], align 8
426 // CHECK1-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[_TMP1]], align 8
427 // CHECK1-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[D_ADDR]], align 8
428 // CHECK1-NEXT:    [[TMP8:%.*]] = load %class.anon*, %class.anon** [[_TMP2]], align 8
429 // CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*, i32*, %class.anon*)* @.omp_outlined. to void (i32*, i32*, ...)*), i32* [[TMP0]], i32* [[TMP5]], i32* [[TMP6]], i32* [[TMP7]], i32* [[TMP3]], %class.anon* [[TMP8]])
430 // CHECK1-NEXT:    ret void
431 //
432 //
433 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined.
434 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1]] {
435 // CHECK1-NEXT:  entry:
436 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
437 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
438 // CHECK1-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
439 // CHECK1-NEXT:    [[B_ADDR:%.*]] = alloca i32*, align 8
440 // CHECK1-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
441 // CHECK1-NEXT:    [[D_ADDR:%.*]] = alloca i32*, align 8
442 // CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
443 // CHECK1-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon*, align 8
444 // CHECK1-NEXT:    [[TMP:%.*]] = alloca i32*, align 8
445 // CHECK1-NEXT:    [[_TMP1:%.*]] = alloca i32*, align 8
446 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca %class.anon*, align 8
447 // CHECK1-NEXT:    [[L3:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
448 // CHECK1-NEXT:    [[_TMP4:%.*]] = alloca %class.anon*, align 8
449 // CHECK1-NEXT:    [[ARGC5:%.*]] = alloca i32, align 4
450 // CHECK1-NEXT:    [[B6:%.*]] = alloca i32, align 4
451 // CHECK1-NEXT:    [[_TMP7:%.*]] = alloca i32*, align 8
452 // CHECK1-NEXT:    [[C8:%.*]] = alloca i32, align 4
453 // CHECK1-NEXT:    [[_TMP9:%.*]] = alloca i32*, align 8
454 // CHECK1-NEXT:    [[A10:%.*]] = alloca i32, align 4
455 // CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
456 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
457 // CHECK1-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
458 // CHECK1-NEXT:    store i32* [[B]], i32** [[B_ADDR]], align 8
459 // CHECK1-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 8
460 // CHECK1-NEXT:    store i32* [[D]], i32** [[D_ADDR]], align 8
461 // CHECK1-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
462 // CHECK1-NEXT:    store %class.anon* [[L]], %class.anon** [[L_ADDR]], align 8
463 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
464 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[B_ADDR]], align 8
465 // CHECK1-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[C_ADDR]], align 8
466 // CHECK1-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
467 // CHECK1-NEXT:    [[TMP4:%.*]] = load %class.anon*, %class.anon** [[L_ADDR]], align 8
468 // CHECK1-NEXT:    store i32* [[TMP1]], i32** [[TMP]], align 8
469 // CHECK1-NEXT:    store i32* [[TMP2]], i32** [[_TMP1]], align 8
470 // CHECK1-NEXT:    store %class.anon* [[TMP4]], %class.anon** [[_TMP2]], align 8
471 // CHECK1-NEXT:    [[TMP5:%.*]] = load %class.anon*, %class.anon** [[_TMP2]], align 8
472 // CHECK1-NEXT:    [[TMP6:%.*]] = bitcast %class.anon* [[L3]] to i8*
473 // CHECK1-NEXT:    [[TMP7:%.*]] = bitcast %class.anon* [[TMP5]] to i8*
474 // CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 [[TMP7]], i64 40, i1 false)
475 // CHECK1-NEXT:    store %class.anon* [[L3]], %class.anon** [[_TMP4]], align 8
476 // CHECK1-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4
477 // CHECK1-NEXT:    store i32 [[TMP8]], i32* [[ARGC5]], align 4
478 // CHECK1-NEXT:    [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8
479 // CHECK1-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
480 // CHECK1-NEXT:    store i32 [[TMP10]], i32* [[B6]], align 4
481 // CHECK1-NEXT:    store i32* [[B6]], i32** [[_TMP7]], align 8
482 // CHECK1-NEXT:    [[TMP11:%.*]] = load i32*, i32** [[_TMP1]], align 8
483 // CHECK1-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
484 // CHECK1-NEXT:    store i32 [[TMP12]], i32* [[C8]], align 4
485 // CHECK1-NEXT:    store i32* [[C8]], i32** [[_TMP9]], align 8
486 // CHECK1-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP3]], align 4
487 // CHECK1-NEXT:    store i32 [[TMP13]], i32* [[A10]], align 4
488 // CHECK1-NEXT:    [[TMP14:%.*]] = load %class.anon*, %class.anon** [[_TMP4]], align 8
489 // CHECK1-NEXT:    [[CALL:%.*]] = call i64 @"_ZZ4mainENK3$_0clEv"(%class.anon* nonnull align 8 dereferenceable(40) [[TMP14]])
490 // CHECK1-NEXT:    ret void
491 //
492 //
493 // CHECK1-LABEL: define {{[^@]+}}@_ZN1S3fooEv
494 // CHECK1-SAME: (%struct.S* nonnull align 4 dereferenceable(4) [[THIS:%.*]]) #[[ATTR3:[0-9]+]] comdat align 2 {
495 // CHECK1-NEXT:  entry:
496 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
497 // CHECK1-NEXT:    [[L:%.*]] = alloca %class.anon.0*, align 8
498 // CHECK1-NEXT:    [[REF_TMP:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
499 // CHECK1-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
500 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [3 x i8*], align 8
501 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [3 x i8*], align 8
502 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [3 x i8*], align 8
503 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca %class.anon.0*, align 8
504 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS3:%.*]] = alloca [3 x i8*], align 8
505 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS4:%.*]] = alloca [3 x i8*], align 8
506 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS5:%.*]] = alloca [3 x i8*], align 8
507 // CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
508 // CHECK1-NEXT:    [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
509 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0
510 // CHECK1-NEXT:    store %struct.S* [[THIS1]], %struct.S** [[TMP0]], align 8
511 // CHECK1-NEXT:    store %class.anon.0* [[REF_TMP]], %class.anon.0** [[L]], align 8
512 // CHECK1-NEXT:    [[TMP1:%.*]] = load %class.anon.0*, %class.anon.0** [[L]], align 8
513 // CHECK1-NEXT:    store %class.anon.0* [[TMP1]], %class.anon.0** [[TMP]], align 8
514 // CHECK1-NEXT:    [[TMP2:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
515 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP2]], i32 0, i32 0
516 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP2]], i32 0, i32 0
517 // CHECK1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
518 // CHECK1-NEXT:    [[TMP6:%.*]] = bitcast i8** [[TMP5]] to %struct.S**
519 // CHECK1-NEXT:    store %struct.S* [[THIS1]], %struct.S** [[TMP6]], align 8
520 // CHECK1-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
521 // CHECK1-NEXT:    [[TMP8:%.*]] = bitcast i8** [[TMP7]] to %struct.S**
522 // CHECK1-NEXT:    store %struct.S* [[THIS1]], %struct.S** [[TMP8]], align 8
523 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
524 // CHECK1-NEXT:    store i8* null, i8** [[TMP9]], align 8
525 // CHECK1-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
526 // CHECK1-NEXT:    [[TMP11:%.*]] = bitcast i8** [[TMP10]] to %class.anon.0**
527 // CHECK1-NEXT:    store %class.anon.0* [[TMP2]], %class.anon.0** [[TMP11]], align 8
528 // CHECK1-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
529 // CHECK1-NEXT:    [[TMP13:%.*]] = bitcast i8** [[TMP12]] to %class.anon.0**
530 // CHECK1-NEXT:    store %class.anon.0* [[TMP2]], %class.anon.0** [[TMP13]], align 8
531 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
532 // CHECK1-NEXT:    store i8* null, i8** [[TMP14]], align 8
533 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 2
534 // CHECK1-NEXT:    [[TMP16:%.*]] = bitcast i8** [[TMP15]] to %struct.S***
535 // CHECK1-NEXT:    store %struct.S** [[TMP3]], %struct.S*** [[TMP16]], align 8
536 // CHECK1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 2
537 // CHECK1-NEXT:    [[TMP18:%.*]] = bitcast i8** [[TMP17]] to %struct.S***
538 // CHECK1-NEXT:    store %struct.S** [[TMP4]], %struct.S*** [[TMP18]], align 8
539 // CHECK1-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 2
540 // CHECK1-NEXT:    store i8* null, i8** [[TMP19]], align 8
541 // CHECK1-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
542 // CHECK1-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
543 // CHECK1-NEXT:    [[TMP22:%.*]] = call i32 @__tgt_target_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27.region_id, i32 3, i8** [[TMP20]], i8** [[TMP21]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.3, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.4, i32 0, i32 0), i8** null, i8** null)
544 // CHECK1-NEXT:    [[TMP23:%.*]] = icmp ne i32 [[TMP22]], 0
545 // CHECK1-NEXT:    br i1 [[TMP23]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
546 // CHECK1:       omp_offload.failed:
547 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27(%struct.S* [[THIS1]], %class.anon.0* [[TMP2]]) #[[ATTR4]]
548 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
549 // CHECK1:       omp_offload.cont:
550 // CHECK1-NEXT:    [[TMP24:%.*]] = load %class.anon.0*, %class.anon.0** [[L]], align 8
551 // CHECK1-NEXT:    store %class.anon.0* [[TMP24]], %class.anon.0** [[_TMP2]], align 8
552 // CHECK1-NEXT:    [[TMP25:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8
553 // CHECK1-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP25]], i32 0, i32 0
554 // CHECK1-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP25]], i32 0, i32 0
555 // CHECK1-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
556 // CHECK1-NEXT:    [[TMP29:%.*]] = bitcast i8** [[TMP28]] to %struct.S**
557 // CHECK1-NEXT:    store %struct.S* [[THIS1]], %struct.S** [[TMP29]], align 8
558 // CHECK1-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
559 // CHECK1-NEXT:    [[TMP31:%.*]] = bitcast i8** [[TMP30]] to %struct.S**
560 // CHECK1-NEXT:    store %struct.S* [[THIS1]], %struct.S** [[TMP31]], align 8
561 // CHECK1-NEXT:    [[TMP32:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 0
562 // CHECK1-NEXT:    store i8* null, i8** [[TMP32]], align 8
563 // CHECK1-NEXT:    [[TMP33:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 1
564 // CHECK1-NEXT:    [[TMP34:%.*]] = bitcast i8** [[TMP33]] to %class.anon.0**
565 // CHECK1-NEXT:    store %class.anon.0* [[TMP25]], %class.anon.0** [[TMP34]], align 8
566 // CHECK1-NEXT:    [[TMP35:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 1
567 // CHECK1-NEXT:    [[TMP36:%.*]] = bitcast i8** [[TMP35]] to %class.anon.0**
568 // CHECK1-NEXT:    store %class.anon.0* [[TMP25]], %class.anon.0** [[TMP36]], align 8
569 // CHECK1-NEXT:    [[TMP37:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 1
570 // CHECK1-NEXT:    store i8* null, i8** [[TMP37]], align 8
571 // CHECK1-NEXT:    [[TMP38:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 2
572 // CHECK1-NEXT:    [[TMP39:%.*]] = bitcast i8** [[TMP38]] to %struct.S***
573 // CHECK1-NEXT:    store %struct.S** [[TMP26]], %struct.S*** [[TMP39]], align 8
574 // CHECK1-NEXT:    [[TMP40:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 2
575 // CHECK1-NEXT:    [[TMP41:%.*]] = bitcast i8** [[TMP40]] to %struct.S***
576 // CHECK1-NEXT:    store %struct.S** [[TMP27]], %struct.S*** [[TMP41]], align 8
577 // CHECK1-NEXT:    [[TMP42:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_MAPPERS5]], i64 0, i64 2
578 // CHECK1-NEXT:    store i8* null, i8** [[TMP42]], align 8
579 // CHECK1-NEXT:    [[TMP43:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0
580 // CHECK1-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[DOTOFFLOAD_PTRS4]], i32 0, i32 0
581 // CHECK1-NEXT:    [[TMP45:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29.region_id, i32 3, i8** [[TMP43]], i8** [[TMP44]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_sizes.6, i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* @.offload_maptypes.7, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
582 // CHECK1-NEXT:    [[TMP46:%.*]] = icmp ne i32 [[TMP45]], 0
583 // CHECK1-NEXT:    br i1 [[TMP46]], label [[OMP_OFFLOAD_FAILED6:%.*]], label [[OMP_OFFLOAD_CONT7:%.*]]
584 // CHECK1:       omp_offload.failed6:
585 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29(%struct.S* [[THIS1]], %class.anon.0* [[TMP25]]) #[[ATTR4]]
586 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT7]]
587 // CHECK1:       omp_offload.cont7:
588 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[THIS1]], i32 0, i32 0
589 // CHECK1-NEXT:    [[TMP47:%.*]] = load i32, i32* [[A]], align 4
590 // CHECK1-NEXT:    [[TMP48:%.*]] = load %class.anon.0*, %class.anon.0** [[L]], align 8
591 // CHECK1-NEXT:    [[CALL:%.*]] = call signext i32 @_Z3fooIZN1S3fooEvEUlvE_EiRKT_(%class.anon.0* nonnull align 8 dereferenceable(8) [[TMP48]])
592 // CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP47]], [[CALL]]
593 // CHECK1-NEXT:    ret i32 [[ADD]]
594 //
595 //
596 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27
597 // CHECK1-SAME: (%struct.S* [[THIS:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] {
598 // CHECK1-NEXT:  entry:
599 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
600 // CHECK1-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon.0*, align 8
601 // CHECK1-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
602 // CHECK1-NEXT:    [[L1:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
603 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca %class.anon.0*, align 8
604 // CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
605 // CHECK1-NEXT:    store %class.anon.0* [[L]], %class.anon.0** [[L_ADDR]], align 8
606 // CHECK1-NEXT:    [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
607 // CHECK1-NEXT:    [[TMP1:%.*]] = load %class.anon.0*, %class.anon.0** [[L_ADDR]], align 8
608 // CHECK1-NEXT:    store %class.anon.0* [[TMP1]], %class.anon.0** [[TMP]], align 8
609 // CHECK1-NEXT:    [[TMP2:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
610 // CHECK1-NEXT:    [[TMP3:%.*]] = bitcast %class.anon.0* [[L1]] to i8*
611 // CHECK1-NEXT:    [[TMP4:%.*]] = bitcast %class.anon.0* [[TMP2]] to i8*
612 // CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 8, i1 false)
613 // CHECK1-NEXT:    store %class.anon.0* [[L1]], %class.anon.0** [[_TMP2]], align 8
614 // CHECK1-NEXT:    [[TMP5:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8
615 // CHECK1-NEXT:    [[CALL:%.*]] = call signext i32 @_ZZN1S3fooEvENKUlvE_clEv(%class.anon.0* nonnull align 8 dereferenceable(8) [[TMP5]])
616 // CHECK1-NEXT:    ret void
617 //
618 //
619 // CHECK1-LABEL: define {{[^@]+}}@_ZZN1S3fooEvENKUlvE_clEv
620 // CHECK1-SAME: (%class.anon.0* nonnull align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR3]] comdat align 2 {
621 // CHECK1-NEXT:  entry:
622 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8
623 // CHECK1-NEXT:    store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8
624 // CHECK1-NEXT:    [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8
625 // CHECK1-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0
626 // CHECK1-NEXT:    [[TMP1:%.*]] = load %struct.S*, %struct.S** [[TMP0]], align 8
627 // CHECK1-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[TMP1]], i32 0, i32 0
628 // CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
629 // CHECK1-NEXT:    ret i32 [[TMP2]]
630 //
631 //
632 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29
633 // CHECK1-SAME: (%struct.S* [[THIS:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] {
634 // CHECK1-NEXT:  entry:
635 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
636 // CHECK1-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon.0*, align 8
637 // CHECK1-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
638 // CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
639 // CHECK1-NEXT:    store %class.anon.0* [[L]], %class.anon.0** [[L_ADDR]], align 8
640 // CHECK1-NEXT:    [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
641 // CHECK1-NEXT:    [[TMP1:%.*]] = load %class.anon.0*, %class.anon.0** [[L_ADDR]], align 8
642 // CHECK1-NEXT:    store %class.anon.0* [[TMP1]], %class.anon.0** [[TMP]], align 8
643 // CHECK1-NEXT:    [[TMP2:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
644 // CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %struct.S*, %class.anon.0*)* @.omp_outlined..5 to void (i32*, i32*, ...)*), %struct.S* [[TMP0]], %class.anon.0* [[TMP2]])
645 // CHECK1-NEXT:    ret void
646 //
647 //
648 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..5
649 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.S* [[THIS:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] {
650 // CHECK1-NEXT:  entry:
651 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
652 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
653 // CHECK1-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
654 // CHECK1-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon.0*, align 8
655 // CHECK1-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
656 // CHECK1-NEXT:    [[L1:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
657 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca %class.anon.0*, align 8
658 // CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
659 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
660 // CHECK1-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
661 // CHECK1-NEXT:    store %class.anon.0* [[L]], %class.anon.0** [[L_ADDR]], align 8
662 // CHECK1-NEXT:    [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
663 // CHECK1-NEXT:    [[TMP1:%.*]] = load %class.anon.0*, %class.anon.0** [[L_ADDR]], align 8
664 // CHECK1-NEXT:    store %class.anon.0* [[TMP1]], %class.anon.0** [[TMP]], align 8
665 // CHECK1-NEXT:    [[TMP2:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
666 // CHECK1-NEXT:    [[TMP3:%.*]] = bitcast %class.anon.0* [[L1]] to i8*
667 // CHECK1-NEXT:    [[TMP4:%.*]] = bitcast %class.anon.0* [[TMP2]] to i8*
668 // CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 8, i1 false)
669 // CHECK1-NEXT:    store %class.anon.0* [[L1]], %class.anon.0** [[_TMP2]], align 8
670 // CHECK1-NEXT:    [[TMP5:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8
671 // CHECK1-NEXT:    [[CALL:%.*]] = call signext i32 @_ZZN1S3fooEvENKUlvE_clEv(%class.anon.0* nonnull align 8 dereferenceable(8) [[TMP5]])
672 // CHECK1-NEXT:    ret void
673 //
674 //
675 // CHECK1-LABEL: define {{[^@]+}}@_Z3fooIZN1S3fooEvEUlvE_EiRKT_
676 // CHECK1-SAME: (%class.anon.0* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR3]] comdat {
677 // CHECK1-NEXT:  entry:
678 // CHECK1-NEXT:    [[T_ADDR:%.*]] = alloca %class.anon.0*, align 8
679 // CHECK1-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
680 // CHECK1-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8
681 // CHECK1-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8
682 // CHECK1-NEXT:    [[DOTOFFLOAD_MAPPERS:%.*]] = alloca [2 x i8*], align 8
683 // CHECK1-NEXT:    store %class.anon.0* [[T]], %class.anon.0** [[T_ADDR]], align 8
684 // CHECK1-NEXT:    [[TMP0:%.*]] = load %class.anon.0*, %class.anon.0** [[T_ADDR]], align 8
685 // CHECK1-NEXT:    store %class.anon.0* [[TMP0]], %class.anon.0** [[TMP]], align 8
686 // CHECK1-NEXT:    [[TMP1:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
687 // CHECK1-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[TMP1]], i32 0, i32 0
688 // CHECK1-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP1]], i32 0, i32 0
689 // CHECK1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
690 // CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i8** [[TMP4]] to %class.anon.0**
691 // CHECK1-NEXT:    store %class.anon.0* [[TMP1]], %class.anon.0** [[TMP5]], align 8
692 // CHECK1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
693 // CHECK1-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to %class.anon.0**
694 // CHECK1-NEXT:    store %class.anon.0* [[TMP1]], %class.anon.0** [[TMP7]], align 8
695 // CHECK1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 0
696 // CHECK1-NEXT:    store i8* null, i8** [[TMP8]], align 8
697 // CHECK1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 1
698 // CHECK1-NEXT:    [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %struct.S***
699 // CHECK1-NEXT:    store %struct.S** [[TMP2]], %struct.S*** [[TMP10]], align 8
700 // CHECK1-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 1
701 // CHECK1-NEXT:    [[TMP12:%.*]] = bitcast i8** [[TMP11]] to %struct.S***
702 // CHECK1-NEXT:    store %struct.S** [[TMP3]], %struct.S*** [[TMP12]], align 8
703 // CHECK1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_MAPPERS]], i64 0, i64 1
704 // CHECK1-NEXT:    store i8* null, i8** [[TMP13]], align 8
705 // CHECK1-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0
706 // CHECK1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i32 0, i32 0
707 // CHECK1-NEXT:    [[TMP16:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18.region_id, i32 2, i8** [[TMP14]], i8** [[TMP15]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.9, i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.10, i32 0, i32 0), i8** null, i8** null, i32 1, i32 0)
708 // CHECK1-NEXT:    [[TMP17:%.*]] = icmp ne i32 [[TMP16]], 0
709 // CHECK1-NEXT:    br i1 [[TMP17]], label [[OMP_OFFLOAD_FAILED:%.*]], label [[OMP_OFFLOAD_CONT:%.*]]
710 // CHECK1:       omp_offload.failed:
711 // CHECK1-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18(%class.anon.0* [[TMP1]]) #[[ATTR4]]
712 // CHECK1-NEXT:    br label [[OMP_OFFLOAD_CONT]]
713 // CHECK1:       omp_offload.cont:
714 // CHECK1-NEXT:    ret i32 0
715 //
716 //
717 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18
718 // CHECK1-SAME: (%class.anon.0* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR1]] {
719 // CHECK1-NEXT:  entry:
720 // CHECK1-NEXT:    [[T_ADDR:%.*]] = alloca %class.anon.0*, align 8
721 // CHECK1-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
722 // CHECK1-NEXT:    store %class.anon.0* [[T]], %class.anon.0** [[T_ADDR]], align 8
723 // CHECK1-NEXT:    [[TMP0:%.*]] = load %class.anon.0*, %class.anon.0** [[T_ADDR]], align 8
724 // CHECK1-NEXT:    store %class.anon.0* [[TMP0]], %class.anon.0** [[TMP]], align 8
725 // CHECK1-NEXT:    [[TMP1:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
726 // CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, %class.anon.0*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), %class.anon.0* [[TMP1]])
727 // CHECK1-NEXT:    ret void
728 //
729 //
730 // CHECK1-LABEL: define {{[^@]+}}@.omp_outlined..8
731 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR1]] {
732 // CHECK1-NEXT:  entry:
733 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
734 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
735 // CHECK1-NEXT:    [[T_ADDR:%.*]] = alloca %class.anon.0*, align 8
736 // CHECK1-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
737 // CHECK1-NEXT:    [[T1:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
738 // CHECK1-NEXT:    [[_TMP2:%.*]] = alloca %class.anon.0*, align 8
739 // CHECK1-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
740 // CHECK1-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
741 // CHECK1-NEXT:    store %class.anon.0* [[T]], %class.anon.0** [[T_ADDR]], align 8
742 // CHECK1-NEXT:    [[TMP0:%.*]] = load %class.anon.0*, %class.anon.0** [[T_ADDR]], align 8
743 // CHECK1-NEXT:    store %class.anon.0* [[TMP0]], %class.anon.0** [[TMP]], align 8
744 // CHECK1-NEXT:    [[TMP1:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
745 // CHECK1-NEXT:    [[TMP2:%.*]] = bitcast %class.anon.0* [[T1]] to i8*
746 // CHECK1-NEXT:    [[TMP3:%.*]] = bitcast %class.anon.0* [[TMP1]] to i8*
747 // CHECK1-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 8, i1 false)
748 // CHECK1-NEXT:    store %class.anon.0* [[T1]], %class.anon.0** [[_TMP2]], align 8
749 // CHECK1-NEXT:    [[TMP4:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8
750 // CHECK1-NEXT:    [[CALL:%.*]] = call signext i32 @_ZZN1S3fooEvENKUlvE_clEv(%class.anon.0* nonnull align 8 dereferenceable(8) [[TMP4]])
751 // CHECK1-NEXT:    ret void
752 //
753 //
754 // CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg
755 // CHECK1-SAME: () #[[ATTR5:[0-9]+]] {
756 // CHECK1-NEXT:  entry:
757 // CHECK1-NEXT:    call void @__tgt_register_requires(i64 1)
758 // CHECK1-NEXT:    ret void
759 //
760 //
761 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27_worker
762 // CHECK2-SAME: () #[[ATTR0:[0-9]+]] {
763 // CHECK2-NEXT:  entry:
764 // CHECK2-NEXT:    [[WORK_FN:%.*]] = alloca i8*, align 8
765 // CHECK2-NEXT:    [[EXEC_STATUS:%.*]] = alloca i8, align 1
766 // CHECK2-NEXT:    store i8* null, i8** [[WORK_FN]], align 8
767 // CHECK2-NEXT:    store i8 0, i8* [[EXEC_STATUS]], align 1
768 // CHECK2-NEXT:    br label [[DOTAWAIT_WORK:%.*]]
769 // CHECK2:       .await.work:
770 // CHECK2-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
771 // CHECK2-NEXT:    [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]])
772 // CHECK2-NEXT:    [[TMP1:%.*]] = zext i1 [[TMP0]] to i8
773 // CHECK2-NEXT:    store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1
774 // CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8
775 // CHECK2-NEXT:    [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null
776 // CHECK2-NEXT:    br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]]
777 // CHECK2:       .select.workers:
778 // CHECK2-NEXT:    [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1
779 // CHECK2-NEXT:    [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0
780 // CHECK2-NEXT:    br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]]
781 // CHECK2:       .execute.parallel:
782 // CHECK2-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
783 // CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)*
784 // CHECK2-NEXT:    call void [[TMP5]](i16 0, i32 [[TMP4]])
785 // CHECK2-NEXT:    br label [[DOTTERMINATE_PARALLEL:%.*]]
786 // CHECK2:       .terminate.parallel:
787 // CHECK2-NEXT:    call void @__kmpc_kernel_end_parallel()
788 // CHECK2-NEXT:    br label [[DOTBARRIER_PARALLEL]]
789 // CHECK2:       .barrier.parallel:
790 // CHECK2-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
791 // CHECK2-NEXT:    br label [[DOTAWAIT_WORK]]
792 // CHECK2:       .exit:
793 // CHECK2-NEXT:    ret void
794 //
795 //
796 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27
797 // CHECK2-SAME: (%struct.S* [[THIS:%.*]], %class.anon* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1:[0-9]+]] {
798 // CHECK2-NEXT:  entry:
799 // CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
800 // CHECK2-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon*, align 8
801 // CHECK2-NEXT:    [[TMP:%.*]] = alloca %class.anon*, align 8
802 // CHECK2-NEXT:    [[L7:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
803 // CHECK2-NEXT:    [[_TMP8:%.*]] = alloca %class.anon*, align 8
804 // CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
805 // CHECK2-NEXT:    store %class.anon* [[L]], %class.anon** [[L_ADDR]], align 8
806 // CHECK2-NEXT:    [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
807 // CHECK2-NEXT:    [[TMP1:%.*]] = load %class.anon*, %class.anon** [[L_ADDR]], align 8
808 // CHECK2-NEXT:    store %class.anon* [[TMP1]], %class.anon** [[TMP]], align 8
809 // CHECK2-NEXT:    [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
810 // CHECK2-NEXT:    [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
811 // CHECK2-NEXT:    [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
812 // CHECK2-NEXT:    [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]]
813 // CHECK2-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]]
814 // CHECK2-NEXT:    br i1 [[TMP2]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]]
815 // CHECK2:       .worker:
816 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27_worker() #[[ATTR6:[0-9]+]]
817 // CHECK2-NEXT:    br label [[DOTEXIT:%.*]]
818 // CHECK2:       .mastercheck:
819 // CHECK2-NEXT:    [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
820 // CHECK2-NEXT:    [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
821 // CHECK2-NEXT:    [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
822 // CHECK2-NEXT:    [[TMP3:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1
823 // CHECK2-NEXT:    [[TMP4:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1
824 // CHECK2-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], -1
825 // CHECK2-NEXT:    [[MASTER_TID:%.*]] = and i32 [[TMP4]], [[TMP5]]
826 // CHECK2-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]]
827 // CHECK2-NEXT:    br i1 [[TMP6]], label [[DOTMASTER:%.*]], label [[DOTEXIT]]
828 // CHECK2:       .master:
829 // CHECK2-NEXT:    [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
830 // CHECK2-NEXT:    [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
831 // CHECK2-NEXT:    [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]]
832 // CHECK2-NEXT:    call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1)
833 // CHECK2-NEXT:    call void @__kmpc_data_sharing_init_stack()
834 // CHECK2-NEXT:    [[TMP7:%.*]] = load %class.anon*, %class.anon** [[TMP]], align 8
835 // CHECK2-NEXT:    [[TMP8:%.*]] = bitcast %class.anon* [[L7]] to i8*
836 // CHECK2-NEXT:    [[TMP9:%.*]] = bitcast %class.anon* [[TMP7]] to i8*
837 // CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 [[TMP9]], i64 8, i1 false)
838 // CHECK2-NEXT:    store %class.anon* [[L7]], %class.anon** [[_TMP8]], align 8
839 // CHECK2-NEXT:    [[TMP10:%.*]] = load %class.anon*, %class.anon** [[_TMP8]], align 8
840 // CHECK2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP10]], i32 0, i32 0
841 // CHECK2-NEXT:    store %struct.S* [[TMP0]], %struct.S** [[TMP11]], align 8
842 // CHECK2-NEXT:    [[TMP12:%.*]] = load %class.anon*, %class.anon** [[_TMP8]], align 8
843 // CHECK2-NEXT:    [[CALL:%.*]] = call i32 @_ZZN1S3fooEvENKUlvE_clEv(%class.anon* nonnull align 8 dereferenceable(8) [[TMP12]]) #[[ATTR7:[0-9]+]]
844 // CHECK2-NEXT:    br label [[DOTTERMINATION_NOTIFIER:%.*]]
845 // CHECK2:       .termination.notifier:
846 // CHECK2-NEXT:    call void @__kmpc_kernel_deinit(i16 1)
847 // CHECK2-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
848 // CHECK2-NEXT:    br label [[DOTEXIT]]
849 // CHECK2:       .exit:
850 // CHECK2-NEXT:    ret void
851 //
852 //
853 // CHECK2-LABEL: define {{[^@]+}}@_ZZN1S3fooEvENKUlvE_clEv
854 // CHECK2-SAME: (%class.anon* nonnull align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR4:[0-9]+]] comdat align 2 {
855 // CHECK2-NEXT:  entry:
856 // CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %class.anon*, align 8
857 // CHECK2-NEXT:    store %class.anon* [[THIS]], %class.anon** [[THIS_ADDR]], align 8
858 // CHECK2-NEXT:    [[THIS1:%.*]] = load %class.anon*, %class.anon** [[THIS_ADDR]], align 8
859 // CHECK2-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON:%.*]], %class.anon* [[THIS1]], i32 0, i32 0
860 // CHECK2-NEXT:    [[TMP1:%.*]] = load %struct.S*, %struct.S** [[TMP0]], align 8
861 // CHECK2-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[TMP1]], i32 0, i32 0
862 // CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
863 // CHECK2-NEXT:    ret i32 [[TMP2]]
864 //
865 //
866 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29
867 // CHECK2-SAME: (%struct.S* [[THIS:%.*]], %class.anon* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] {
868 // CHECK2-NEXT:  entry:
869 // CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
870 // CHECK2-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon*, align 8
871 // CHECK2-NEXT:    [[TMP:%.*]] = alloca %class.anon*, align 8
872 // CHECK2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8
873 // CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
874 // CHECK2-NEXT:    store %class.anon* [[L]], %class.anon** [[L_ADDR]], align 8
875 // CHECK2-NEXT:    [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
876 // CHECK2-NEXT:    [[TMP1:%.*]] = load %class.anon*, %class.anon** [[L_ADDR]], align 8
877 // CHECK2-NEXT:    store %class.anon* [[TMP1]], %class.anon** [[TMP]], align 8
878 // CHECK2-NEXT:    [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
879 // CHECK2-NEXT:    call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1)
880 // CHECK2-NEXT:    call void @__kmpc_data_sharing_init_stack_spmd()
881 // CHECK2-NEXT:    br label [[DOTEXECUTE:%.*]]
882 // CHECK2:       .execute:
883 // CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
884 // CHECK2-NEXT:    [[TMP3:%.*]] = load %class.anon*, %class.anon** [[TMP]], align 8
885 // CHECK2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
886 // CHECK2-NEXT:    [[TMP5:%.*]] = bitcast %struct.S* [[TMP0]] to i8*
887 // CHECK2-NEXT:    store i8* [[TMP5]], i8** [[TMP4]], align 8
888 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
889 // CHECK2-NEXT:    [[TMP7:%.*]] = bitcast %class.anon* [[TMP3]] to i8*
890 // CHECK2-NEXT:    store i8* [[TMP7]], i8** [[TMP6]], align 8
891 // CHECK2-NEXT:    [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
892 // CHECK2-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.S*, %class.anon*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP8]], i64 2)
893 // CHECK2-NEXT:    br label [[DOTOMP_DEINIT:%.*]]
894 // CHECK2:       .omp.deinit:
895 // CHECK2-NEXT:    call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
896 // CHECK2-NEXT:    br label [[DOTEXIT:%.*]]
897 // CHECK2:       .exit:
898 // CHECK2-NEXT:    ret void
899 //
900 //
901 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__
902 // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.S* [[THIS:%.*]], %class.anon* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] {
903 // CHECK2-NEXT:  entry:
904 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
905 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
906 // CHECK2-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
907 // CHECK2-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon*, align 8
908 // CHECK2-NEXT:    [[TMP:%.*]] = alloca %class.anon*, align 8
909 // CHECK2-NEXT:    [[L1:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
910 // CHECK2-NEXT:    [[_TMP2:%.*]] = alloca %class.anon*, align 8
911 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
912 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
913 // CHECK2-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
914 // CHECK2-NEXT:    store %class.anon* [[L]], %class.anon** [[L_ADDR]], align 8
915 // CHECK2-NEXT:    [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
916 // CHECK2-NEXT:    [[TMP1:%.*]] = load %class.anon*, %class.anon** [[L_ADDR]], align 8
917 // CHECK2-NEXT:    store %class.anon* [[TMP1]], %class.anon** [[TMP]], align 8
918 // CHECK2-NEXT:    [[TMP2:%.*]] = load %class.anon*, %class.anon** [[TMP]], align 8
919 // CHECK2-NEXT:    [[TMP3:%.*]] = bitcast %class.anon* [[L1]] to i8*
920 // CHECK2-NEXT:    [[TMP4:%.*]] = bitcast %class.anon* [[TMP2]] to i8*
921 // CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 8, i1 false)
922 // CHECK2-NEXT:    store %class.anon* [[L1]], %class.anon** [[_TMP2]], align 8
923 // CHECK2-NEXT:    [[TMP5:%.*]] = load %class.anon*, %class.anon** [[_TMP2]], align 8
924 // CHECK2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP5]], i32 0, i32 0
925 // CHECK2-NEXT:    store %struct.S* [[TMP0]], %struct.S** [[TMP6]], align 8
926 // CHECK2-NEXT:    [[TMP7:%.*]] = load %class.anon*, %class.anon** [[_TMP2]], align 8
927 // CHECK2-NEXT:    [[CALL:%.*]] = call i32 @_ZZN1S3fooEvENKUlvE_clEv(%class.anon* nonnull align 8 dereferenceable(8) [[TMP7]]) #[[ATTR7]]
928 // CHECK2-NEXT:    ret void
929 //
930 //
931 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_worker
932 // CHECK2-SAME: () #[[ATTR0]] {
933 // CHECK2-NEXT:  entry:
934 // CHECK2-NEXT:    [[WORK_FN:%.*]] = alloca i8*, align 8
935 // CHECK2-NEXT:    [[EXEC_STATUS:%.*]] = alloca i8, align 1
936 // CHECK2-NEXT:    store i8* null, i8** [[WORK_FN]], align 8
937 // CHECK2-NEXT:    store i8 0, i8* [[EXEC_STATUS]], align 1
938 // CHECK2-NEXT:    br label [[DOTAWAIT_WORK:%.*]]
939 // CHECK2:       .await.work:
940 // CHECK2-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
941 // CHECK2-NEXT:    [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]])
942 // CHECK2-NEXT:    [[TMP1:%.*]] = zext i1 [[TMP0]] to i8
943 // CHECK2-NEXT:    store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1
944 // CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8
945 // CHECK2-NEXT:    [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null
946 // CHECK2-NEXT:    br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]]
947 // CHECK2:       .select.workers:
948 // CHECK2-NEXT:    [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1
949 // CHECK2-NEXT:    [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0
950 // CHECK2-NEXT:    br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]]
951 // CHECK2:       .execute.parallel:
952 // CHECK2-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
953 // CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)*
954 // CHECK2-NEXT:    call void [[TMP5]](i16 0, i32 [[TMP4]])
955 // CHECK2-NEXT:    br label [[DOTTERMINATE_PARALLEL:%.*]]
956 // CHECK2:       .terminate.parallel:
957 // CHECK2-NEXT:    call void @__kmpc_kernel_end_parallel()
958 // CHECK2-NEXT:    br label [[DOTBARRIER_PARALLEL]]
959 // CHECK2:       .barrier.parallel:
960 // CHECK2-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
961 // CHECK2-NEXT:    br label [[DOTAWAIT_WORK]]
962 // CHECK2:       .exit:
963 // CHECK2-NEXT:    ret void
964 //
965 //
966 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41
967 // CHECK2-SAME: (i64 [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon.0* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1]] {
968 // CHECK2-NEXT:  entry:
969 // CHECK2-NEXT:    [[ARGC_ADDR:%.*]] = alloca i64, align 8
970 // CHECK2-NEXT:    [[B_ADDR:%.*]] = alloca i32*, align 8
971 // CHECK2-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
972 // CHECK2-NEXT:    [[D_ADDR:%.*]] = alloca i32*, align 8
973 // CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
974 // CHECK2-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon.0*, align 8
975 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32*, align 8
976 // CHECK2-NEXT:    [[_TMP1:%.*]] = alloca i32*, align 8
977 // CHECK2-NEXT:    [[_TMP2:%.*]] = alloca %class.anon.0*, align 8
978 // CHECK2-NEXT:    [[L9:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
979 // CHECK2-NEXT:    [[_TMP10:%.*]] = alloca %class.anon.0*, align 8
980 // CHECK2-NEXT:    [[B11:%.*]] = alloca i32, align 4
981 // CHECK2-NEXT:    [[_TMP12:%.*]] = alloca i32*, align 8
982 // CHECK2-NEXT:    [[C13:%.*]] = alloca i32, align 4
983 // CHECK2-NEXT:    [[_TMP14:%.*]] = alloca i32*, align 8
984 // CHECK2-NEXT:    store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8
985 // CHECK2-NEXT:    store i32* [[B]], i32** [[B_ADDR]], align 8
986 // CHECK2-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 8
987 // CHECK2-NEXT:    store i32* [[D]], i32** [[D_ADDR]], align 8
988 // CHECK2-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
989 // CHECK2-NEXT:    store %class.anon.0* [[L]], %class.anon.0** [[L_ADDR]], align 8
990 // CHECK2-NEXT:    [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32*
991 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[B_ADDR]], align 8
992 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[C_ADDR]], align 8
993 // CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8
994 // CHECK2-NEXT:    [[TMP3:%.*]] = load %class.anon.0*, %class.anon.0** [[L_ADDR]], align 8
995 // CHECK2-NEXT:    store i32* [[TMP0]], i32** [[TMP]], align 8
996 // CHECK2-NEXT:    store i32* [[TMP1]], i32** [[_TMP1]], align 8
997 // CHECK2-NEXT:    store %class.anon.0* [[TMP3]], %class.anon.0** [[_TMP2]], align 8
998 // CHECK2-NEXT:    [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
999 // CHECK2-NEXT:    [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1000 // CHECK2-NEXT:    [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
1001 // CHECK2-NEXT:    [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]]
1002 // CHECK2-NEXT:    [[TMP4:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]]
1003 // CHECK2-NEXT:    br i1 [[TMP4]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]]
1004 // CHECK2:       .worker:
1005 // CHECK2-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_worker() #[[ATTR6]]
1006 // CHECK2-NEXT:    br label [[DOTEXIT:%.*]]
1007 // CHECK2:       .mastercheck:
1008 // CHECK2-NEXT:    [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
1009 // CHECK2-NEXT:    [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1010 // CHECK2-NEXT:    [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
1011 // CHECK2-NEXT:    [[TMP5:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE5]], 1
1012 // CHECK2-NEXT:    [[TMP6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], 1
1013 // CHECK2-NEXT:    [[TMP7:%.*]] = xor i32 [[TMP5]], -1
1014 // CHECK2-NEXT:    [[MASTER_TID:%.*]] = and i32 [[TMP6]], [[TMP7]]
1015 // CHECK2-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[NVPTX_TID3]], [[MASTER_TID]]
1016 // CHECK2-NEXT:    br i1 [[TMP8]], label [[DOTMASTER:%.*]], label [[DOTEXIT]]
1017 // CHECK2:       .master:
1018 // CHECK2-NEXT:    [[NVPTX_NUM_THREADS6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1019 // CHECK2-NEXT:    [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
1020 // CHECK2-NEXT:    [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]]
1021 // CHECK2-NEXT:    call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1)
1022 // CHECK2-NEXT:    call void @__kmpc_data_sharing_init_stack()
1023 // CHECK2-NEXT:    [[TMP9:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8
1024 // CHECK2-NEXT:    [[TMP10:%.*]] = bitcast %class.anon.0* [[L9]] to i8*
1025 // CHECK2-NEXT:    [[TMP11:%.*]] = bitcast %class.anon.0* [[TMP9]] to i8*
1026 // CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 40, i1 false)
1027 // CHECK2-NEXT:    store %class.anon.0* [[L9]], %class.anon.0** [[_TMP10]], align 8
1028 // CHECK2-NEXT:    [[TMP12:%.*]] = load i32*, i32** [[TMP]], align 8
1029 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
1030 // CHECK2-NEXT:    store i32 [[TMP13]], i32* [[B11]], align 4
1031 // CHECK2-NEXT:    store i32* [[B11]], i32** [[_TMP12]], align 8
1032 // CHECK2-NEXT:    [[TMP14:%.*]] = load i32*, i32** [[_TMP1]], align 8
1033 // CHECK2-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
1034 // CHECK2-NEXT:    store i32 [[TMP15]], i32* [[C13]], align 4
1035 // CHECK2-NEXT:    store i32* [[C13]], i32** [[_TMP14]], align 8
1036 // CHECK2-NEXT:    [[TMP16:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP10]], align 8
1037 // CHECK2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP16]], i32 0, i32 0
1038 // CHECK2-NEXT:    store i32* [[CONV]], i32** [[TMP17]], align 8
1039 // CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP16]], i32 0, i32 1
1040 // CHECK2-NEXT:    [[TMP19:%.*]] = load i32*, i32** [[_TMP12]], align 8
1041 // CHECK2-NEXT:    store i32* [[TMP19]], i32** [[TMP18]], align 8
1042 // CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP16]], i32 0, i32 2
1043 // CHECK2-NEXT:    [[TMP21:%.*]] = load i32*, i32** [[_TMP14]], align 8
1044 // CHECK2-NEXT:    store i32* [[TMP21]], i32** [[TMP20]], align 8
1045 // CHECK2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP16]], i32 0, i32 3
1046 // CHECK2-NEXT:    store i32** [[D_ADDR]], i32*** [[TMP22]], align 8
1047 // CHECK2-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP16]], i32 0, i32 4
1048 // CHECK2-NEXT:    store i32* [[TMP2]], i32** [[TMP23]], align 8
1049 // CHECK2-NEXT:    [[TMP24:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP10]], align 8
1050 // CHECK2-NEXT:    [[CALL:%.*]] = call i64 @"_ZZ4mainENK3$_0clEv"(%class.anon.0* nonnull align 8 dereferenceable(40) [[TMP24]]) #[[ATTR7]]
1051 // CHECK2-NEXT:    br label [[DOTTERMINATION_NOTIFIER:%.*]]
1052 // CHECK2:       .termination.notifier:
1053 // CHECK2-NEXT:    call void @__kmpc_kernel_deinit(i16 1)
1054 // CHECK2-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
1055 // CHECK2-NEXT:    br label [[DOTEXIT]]
1056 // CHECK2:       .exit:
1057 // CHECK2-NEXT:    ret void
1058 //
1059 //
1060 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43
1061 // CHECK2-SAME: (i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon.0* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1]] {
1062 // CHECK2-NEXT:  entry:
1063 // CHECK2-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
1064 // CHECK2-NEXT:    [[B_ADDR:%.*]] = alloca i32*, align 8
1065 // CHECK2-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
1066 // CHECK2-NEXT:    [[D_ADDR:%.*]] = alloca i32*, align 8
1067 // CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
1068 // CHECK2-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon.0*, align 8
1069 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32*, align 8
1070 // CHECK2-NEXT:    [[_TMP1:%.*]] = alloca i32*, align 8
1071 // CHECK2-NEXT:    [[_TMP2:%.*]] = alloca %class.anon.0*, align 8
1072 // CHECK2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [6 x i8*], align 8
1073 // CHECK2-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
1074 // CHECK2-NEXT:    store i32* [[B]], i32** [[B_ADDR]], align 8
1075 // CHECK2-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 8
1076 // CHECK2-NEXT:    store i32* [[D]], i32** [[D_ADDR]], align 8
1077 // CHECK2-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
1078 // CHECK2-NEXT:    store %class.anon.0* [[L]], %class.anon.0** [[L_ADDR]], align 8
1079 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
1080 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[B_ADDR]], align 8
1081 // CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[C_ADDR]], align 8
1082 // CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
1083 // CHECK2-NEXT:    [[TMP4:%.*]] = load %class.anon.0*, %class.anon.0** [[L_ADDR]], align 8
1084 // CHECK2-NEXT:    store i32* [[TMP1]], i32** [[TMP]], align 8
1085 // CHECK2-NEXT:    store i32* [[TMP2]], i32** [[_TMP1]], align 8
1086 // CHECK2-NEXT:    store %class.anon.0* [[TMP4]], %class.anon.0** [[_TMP2]], align 8
1087 // CHECK2-NEXT:    [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1088 // CHECK2-NEXT:    call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1)
1089 // CHECK2-NEXT:    call void @__kmpc_data_sharing_init_stack_spmd()
1090 // CHECK2-NEXT:    br label [[DOTEXECUTE:%.*]]
1091 // CHECK2:       .execute:
1092 // CHECK2-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
1093 // CHECK2-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[TMP]], align 8
1094 // CHECK2-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[_TMP1]], align 8
1095 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32*, i32** [[D_ADDR]], align 8
1096 // CHECK2-NEXT:    [[TMP9:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8
1097 // CHECK2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
1098 // CHECK2-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP0]] to i8*
1099 // CHECK2-NEXT:    store i8* [[TMP11]], i8** [[TMP10]], align 8
1100 // CHECK2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
1101 // CHECK2-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP6]] to i8*
1102 // CHECK2-NEXT:    store i8* [[TMP13]], i8** [[TMP12]], align 8
1103 // CHECK2-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
1104 // CHECK2-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP7]] to i8*
1105 // CHECK2-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 8
1106 // CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
1107 // CHECK2-NEXT:    [[TMP17:%.*]] = bitcast i32* [[TMP8]] to i8*
1108 // CHECK2-NEXT:    store i8* [[TMP17]], i8** [[TMP16]], align 8
1109 // CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
1110 // CHECK2-NEXT:    [[TMP19:%.*]] = bitcast i32* [[TMP3]] to i8*
1111 // CHECK2-NEXT:    store i8* [[TMP19]], i8** [[TMP18]], align 8
1112 // CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 5
1113 // CHECK2-NEXT:    [[TMP21:%.*]] = bitcast %class.anon.0* [[TMP9]] to i8*
1114 // CHECK2-NEXT:    store i8* [[TMP21]], i8** [[TMP20]], align 8
1115 // CHECK2-NEXT:    [[TMP22:%.*]] = bitcast [6 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
1116 // CHECK2-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*, i32*, %class.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP22]], i64 6)
1117 // CHECK2-NEXT:    br label [[DOTOMP_DEINIT:%.*]]
1118 // CHECK2:       .omp.deinit:
1119 // CHECK2-NEXT:    call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
1120 // CHECK2-NEXT:    br label [[DOTEXIT:%.*]]
1121 // CHECK2:       .exit:
1122 // CHECK2-NEXT:    ret void
1123 //
1124 //
1125 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1
1126 // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon.0* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1]] {
1127 // CHECK2-NEXT:  entry:
1128 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
1129 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
1130 // CHECK2-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
1131 // CHECK2-NEXT:    [[B_ADDR:%.*]] = alloca i32*, align 8
1132 // CHECK2-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
1133 // CHECK2-NEXT:    [[D_ADDR:%.*]] = alloca i32*, align 8
1134 // CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
1135 // CHECK2-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon.0*, align 8
1136 // CHECK2-NEXT:    [[TMP:%.*]] = alloca i32*, align 8
1137 // CHECK2-NEXT:    [[_TMP1:%.*]] = alloca i32*, align 8
1138 // CHECK2-NEXT:    [[_TMP2:%.*]] = alloca %class.anon.0*, align 8
1139 // CHECK2-NEXT:    [[L3:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
1140 // CHECK2-NEXT:    [[_TMP4:%.*]] = alloca %class.anon.0*, align 8
1141 // CHECK2-NEXT:    [[ARGC5:%.*]] = alloca i32, align 4
1142 // CHECK2-NEXT:    [[B6:%.*]] = alloca i32, align 4
1143 // CHECK2-NEXT:    [[_TMP7:%.*]] = alloca i32*, align 8
1144 // CHECK2-NEXT:    [[C8:%.*]] = alloca i32, align 4
1145 // CHECK2-NEXT:    [[_TMP9:%.*]] = alloca i32*, align 8
1146 // CHECK2-NEXT:    [[A10:%.*]] = alloca i32, align 4
1147 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
1148 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
1149 // CHECK2-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
1150 // CHECK2-NEXT:    store i32* [[B]], i32** [[B_ADDR]], align 8
1151 // CHECK2-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 8
1152 // CHECK2-NEXT:    store i32* [[D]], i32** [[D_ADDR]], align 8
1153 // CHECK2-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
1154 // CHECK2-NEXT:    store %class.anon.0* [[L]], %class.anon.0** [[L_ADDR]], align 8
1155 // CHECK2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
1156 // CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[B_ADDR]], align 8
1157 // CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[C_ADDR]], align 8
1158 // CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
1159 // CHECK2-NEXT:    [[TMP4:%.*]] = load %class.anon.0*, %class.anon.0** [[L_ADDR]], align 8
1160 // CHECK2-NEXT:    store i32* [[TMP1]], i32** [[TMP]], align 8
1161 // CHECK2-NEXT:    store i32* [[TMP2]], i32** [[_TMP1]], align 8
1162 // CHECK2-NEXT:    store %class.anon.0* [[TMP4]], %class.anon.0** [[_TMP2]], align 8
1163 // CHECK2-NEXT:    [[TMP5:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8
1164 // CHECK2-NEXT:    [[TMP6:%.*]] = bitcast %class.anon.0* [[L3]] to i8*
1165 // CHECK2-NEXT:    [[TMP7:%.*]] = bitcast %class.anon.0* [[TMP5]] to i8*
1166 // CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 [[TMP7]], i64 40, i1 false)
1167 // CHECK2-NEXT:    store %class.anon.0* [[L3]], %class.anon.0** [[_TMP4]], align 8
1168 // CHECK2-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4
1169 // CHECK2-NEXT:    store i32 [[TMP8]], i32* [[ARGC5]], align 4
1170 // CHECK2-NEXT:    [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8
1171 // CHECK2-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
1172 // CHECK2-NEXT:    store i32 [[TMP10]], i32* [[B6]], align 4
1173 // CHECK2-NEXT:    store i32* [[B6]], i32** [[_TMP7]], align 8
1174 // CHECK2-NEXT:    [[TMP11:%.*]] = load i32*, i32** [[_TMP1]], align 8
1175 // CHECK2-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
1176 // CHECK2-NEXT:    store i32 [[TMP12]], i32* [[C8]], align 4
1177 // CHECK2-NEXT:    store i32* [[C8]], i32** [[_TMP9]], align 8
1178 // CHECK2-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP3]], align 4
1179 // CHECK2-NEXT:    store i32 [[TMP13]], i32* [[A10]], align 4
1180 // CHECK2-NEXT:    [[TMP14:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP4]], align 8
1181 // CHECK2-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP14]], i32 0, i32 0
1182 // CHECK2-NEXT:    store i32* [[ARGC5]], i32** [[TMP15]], align 8
1183 // CHECK2-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP14]], i32 0, i32 1
1184 // CHECK2-NEXT:    [[TMP17:%.*]] = load i32*, i32** [[_TMP7]], align 8
1185 // CHECK2-NEXT:    store i32* [[TMP17]], i32** [[TMP16]], align 8
1186 // CHECK2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP14]], i32 0, i32 2
1187 // CHECK2-NEXT:    [[TMP19:%.*]] = load i32*, i32** [[_TMP9]], align 8
1188 // CHECK2-NEXT:    store i32* [[TMP19]], i32** [[TMP18]], align 8
1189 // CHECK2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP14]], i32 0, i32 3
1190 // CHECK2-NEXT:    store i32** [[D_ADDR]], i32*** [[TMP20]], align 8
1191 // CHECK2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP14]], i32 0, i32 4
1192 // CHECK2-NEXT:    store i32* [[A10]], i32** [[TMP21]], align 8
1193 // CHECK2-NEXT:    [[TMP22:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP4]], align 8
1194 // CHECK2-NEXT:    [[CALL:%.*]] = call i64 @"_ZZ4mainENK3$_0clEv"(%class.anon.0* nonnull align 8 dereferenceable(40) [[TMP22]]) #[[ATTR7]]
1195 // CHECK2-NEXT:    ret void
1196 //
1197 //
1198 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18
1199 // CHECK2-SAME: (%class.anon* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR1]] {
1200 // CHECK2-NEXT:  entry:
1201 // CHECK2-NEXT:    [[T_ADDR:%.*]] = alloca %class.anon*, align 8
1202 // CHECK2-NEXT:    [[TMP:%.*]] = alloca %class.anon*, align 8
1203 // CHECK2-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
1204 // CHECK2-NEXT:    store %class.anon* [[T]], %class.anon** [[T_ADDR]], align 8
1205 // CHECK2-NEXT:    [[TMP0:%.*]] = load %class.anon*, %class.anon** [[T_ADDR]], align 8
1206 // CHECK2-NEXT:    store %class.anon* [[TMP0]], %class.anon** [[TMP]], align 8
1207 // CHECK2-NEXT:    [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1208 // CHECK2-NEXT:    call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1)
1209 // CHECK2-NEXT:    call void @__kmpc_data_sharing_init_stack_spmd()
1210 // CHECK2-NEXT:    br label [[DOTEXECUTE:%.*]]
1211 // CHECK2:       .execute:
1212 // CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
1213 // CHECK2-NEXT:    [[TMP2:%.*]] = load %class.anon*, %class.anon** [[TMP]], align 8
1214 // CHECK2-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
1215 // CHECK2-NEXT:    [[TMP4:%.*]] = bitcast %class.anon* [[TMP2]] to i8*
1216 // CHECK2-NEXT:    store i8* [[TMP4]], i8** [[TMP3]], align 8
1217 // CHECK2-NEXT:    [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
1218 // CHECK2-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %class.anon*)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP5]], i64 1)
1219 // CHECK2-NEXT:    br label [[DOTOMP_DEINIT:%.*]]
1220 // CHECK2:       .omp.deinit:
1221 // CHECK2-NEXT:    call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
1222 // CHECK2-NEXT:    br label [[DOTEXIT:%.*]]
1223 // CHECK2:       .exit:
1224 // CHECK2-NEXT:    ret void
1225 //
1226 //
1227 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2
1228 // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %class.anon* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR1]] {
1229 // CHECK2-NEXT:  entry:
1230 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
1231 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
1232 // CHECK2-NEXT:    [[T_ADDR:%.*]] = alloca %class.anon*, align 8
1233 // CHECK2-NEXT:    [[TMP:%.*]] = alloca %class.anon*, align 8
1234 // CHECK2-NEXT:    [[T1:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
1235 // CHECK2-NEXT:    [[_TMP2:%.*]] = alloca %class.anon*, align 8
1236 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
1237 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
1238 // CHECK2-NEXT:    store %class.anon* [[T]], %class.anon** [[T_ADDR]], align 8
1239 // CHECK2-NEXT:    [[TMP0:%.*]] = load %class.anon*, %class.anon** [[T_ADDR]], align 8
1240 // CHECK2-NEXT:    store %class.anon* [[TMP0]], %class.anon** [[TMP]], align 8
1241 // CHECK2-NEXT:    [[TMP1:%.*]] = load %class.anon*, %class.anon** [[TMP]], align 8
1242 // CHECK2-NEXT:    [[TMP2:%.*]] = bitcast %class.anon* [[T1]] to i8*
1243 // CHECK2-NEXT:    [[TMP3:%.*]] = bitcast %class.anon* [[TMP1]] to i8*
1244 // CHECK2-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 8, i1 false)
1245 // CHECK2-NEXT:    store %class.anon* [[T1]], %class.anon** [[_TMP2]], align 8
1246 // CHECK2-NEXT:    [[TMP4:%.*]] = load %class.anon*, %class.anon** [[_TMP2]], align 8
1247 // CHECK2-NEXT:    [[TMP5:%.*]] = load %class.anon*, %class.anon** [[_TMP2]], align 8
1248 // CHECK2-NEXT:    [[CALL:%.*]] = call i32 @_ZZN1S3fooEvENKUlvE_clEv(%class.anon* nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR7]]
1249 // CHECK2-NEXT:    ret void
1250 //
1251 //
1252 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_worker
1253 // CHECK3-SAME: () #[[ATTR0:[0-9]+]] {
1254 // CHECK3-NEXT:  entry:
1255 // CHECK3-NEXT:    [[WORK_FN:%.*]] = alloca i8*, align 8
1256 // CHECK3-NEXT:    [[EXEC_STATUS:%.*]] = alloca i8, align 1
1257 // CHECK3-NEXT:    store i8* null, i8** [[WORK_FN]], align 8
1258 // CHECK3-NEXT:    store i8 0, i8* [[EXEC_STATUS]], align 1
1259 // CHECK3-NEXT:    br label [[DOTAWAIT_WORK:%.*]]
1260 // CHECK3:       .await.work:
1261 // CHECK3-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
1262 // CHECK3-NEXT:    [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]])
1263 // CHECK3-NEXT:    [[TMP1:%.*]] = zext i1 [[TMP0]] to i8
1264 // CHECK3-NEXT:    store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1
1265 // CHECK3-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8
1266 // CHECK3-NEXT:    [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null
1267 // CHECK3-NEXT:    br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]]
1268 // CHECK3:       .select.workers:
1269 // CHECK3-NEXT:    [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1
1270 // CHECK3-NEXT:    [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0
1271 // CHECK3-NEXT:    br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]]
1272 // CHECK3:       .execute.parallel:
1273 // CHECK3-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
1274 // CHECK3-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)*
1275 // CHECK3-NEXT:    call void [[TMP5]](i16 0, i32 [[TMP4]])
1276 // CHECK3-NEXT:    br label [[DOTTERMINATE_PARALLEL:%.*]]
1277 // CHECK3:       .terminate.parallel:
1278 // CHECK3-NEXT:    call void @__kmpc_kernel_end_parallel()
1279 // CHECK3-NEXT:    br label [[DOTBARRIER_PARALLEL]]
1280 // CHECK3:       .barrier.parallel:
1281 // CHECK3-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
1282 // CHECK3-NEXT:    br label [[DOTAWAIT_WORK]]
1283 // CHECK3:       .exit:
1284 // CHECK3-NEXT:    ret void
1285 //
1286 //
1287 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41
1288 // CHECK3-SAME: (i64 [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1:[0-9]+]] {
1289 // CHECK3-NEXT:  entry:
1290 // CHECK3-NEXT:    [[ARGC_ADDR:%.*]] = alloca i64, align 8
1291 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca i32*, align 8
1292 // CHECK3-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
1293 // CHECK3-NEXT:    [[D_ADDR:%.*]] = alloca i32*, align 8
1294 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
1295 // CHECK3-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon*, align 8
1296 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32*, align 8
1297 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32*, align 8
1298 // CHECK3-NEXT:    [[_TMP2:%.*]] = alloca %class.anon*, align 8
1299 // CHECK3-NEXT:    [[L9:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
1300 // CHECK3-NEXT:    [[_TMP10:%.*]] = alloca %class.anon*, align 8
1301 // CHECK3-NEXT:    [[B11:%.*]] = alloca i32, align 4
1302 // CHECK3-NEXT:    [[_TMP12:%.*]] = alloca i32*, align 8
1303 // CHECK3-NEXT:    [[C13:%.*]] = alloca i32, align 4
1304 // CHECK3-NEXT:    [[_TMP14:%.*]] = alloca i32*, align 8
1305 // CHECK3-NEXT:    store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8
1306 // CHECK3-NEXT:    store i32* [[B]], i32** [[B_ADDR]], align 8
1307 // CHECK3-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 8
1308 // CHECK3-NEXT:    store i32* [[D]], i32** [[D_ADDR]], align 8
1309 // CHECK3-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
1310 // CHECK3-NEXT:    store %class.anon* [[L]], %class.anon** [[L_ADDR]], align 8
1311 // CHECK3-NEXT:    [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32*
1312 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[B_ADDR]], align 8
1313 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[C_ADDR]], align 8
1314 // CHECK3-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8
1315 // CHECK3-NEXT:    [[TMP3:%.*]] = load %class.anon*, %class.anon** [[L_ADDR]], align 8
1316 // CHECK3-NEXT:    store i32* [[TMP0]], i32** [[TMP]], align 8
1317 // CHECK3-NEXT:    store i32* [[TMP1]], i32** [[_TMP1]], align 8
1318 // CHECK3-NEXT:    store %class.anon* [[TMP3]], %class.anon** [[_TMP2]], align 8
1319 // CHECK3-NEXT:    [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
1320 // CHECK3-NEXT:    [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1321 // CHECK3-NEXT:    [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
1322 // CHECK3-NEXT:    [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]]
1323 // CHECK3-NEXT:    [[TMP4:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]]
1324 // CHECK3-NEXT:    br i1 [[TMP4]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]]
1325 // CHECK3:       .worker:
1326 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_worker() #[[ATTR6:[0-9]+]]
1327 // CHECK3-NEXT:    br label [[DOTEXIT:%.*]]
1328 // CHECK3:       .mastercheck:
1329 // CHECK3-NEXT:    [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
1330 // CHECK3-NEXT:    [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1331 // CHECK3-NEXT:    [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
1332 // CHECK3-NEXT:    [[TMP5:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE5]], 1
1333 // CHECK3-NEXT:    [[TMP6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], 1
1334 // CHECK3-NEXT:    [[TMP7:%.*]] = xor i32 [[TMP5]], -1
1335 // CHECK3-NEXT:    [[MASTER_TID:%.*]] = and i32 [[TMP6]], [[TMP7]]
1336 // CHECK3-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[NVPTX_TID3]], [[MASTER_TID]]
1337 // CHECK3-NEXT:    br i1 [[TMP8]], label [[DOTMASTER:%.*]], label [[DOTEXIT]]
1338 // CHECK3:       .master:
1339 // CHECK3-NEXT:    [[NVPTX_NUM_THREADS6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1340 // CHECK3-NEXT:    [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
1341 // CHECK3-NEXT:    [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]]
1342 // CHECK3-NEXT:    call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1)
1343 // CHECK3-NEXT:    call void @__kmpc_data_sharing_init_stack()
1344 // CHECK3-NEXT:    [[TMP9:%.*]] = load %class.anon*, %class.anon** [[_TMP2]], align 8
1345 // CHECK3-NEXT:    [[TMP10:%.*]] = bitcast %class.anon* [[L9]] to i8*
1346 // CHECK3-NEXT:    [[TMP11:%.*]] = bitcast %class.anon* [[TMP9]] to i8*
1347 // CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 40, i1 false)
1348 // CHECK3-NEXT:    store %class.anon* [[L9]], %class.anon** [[_TMP10]], align 8
1349 // CHECK3-NEXT:    [[TMP12:%.*]] = load i32*, i32** [[TMP]], align 8
1350 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
1351 // CHECK3-NEXT:    store i32 [[TMP13]], i32* [[B11]], align 4
1352 // CHECK3-NEXT:    store i32* [[B11]], i32** [[_TMP12]], align 8
1353 // CHECK3-NEXT:    [[TMP14:%.*]] = load i32*, i32** [[_TMP1]], align 8
1354 // CHECK3-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
1355 // CHECK3-NEXT:    store i32 [[TMP15]], i32* [[C13]], align 4
1356 // CHECK3-NEXT:    store i32* [[C13]], i32** [[_TMP14]], align 8
1357 // CHECK3-NEXT:    [[TMP16:%.*]] = load %class.anon*, %class.anon** [[_TMP10]], align 8
1358 // CHECK3-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP16]], i32 0, i32 0
1359 // CHECK3-NEXT:    store i32* [[CONV]], i32** [[TMP17]], align 8
1360 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP16]], i32 0, i32 1
1361 // CHECK3-NEXT:    [[TMP19:%.*]] = load i32*, i32** [[_TMP12]], align 8
1362 // CHECK3-NEXT:    store i32* [[TMP19]], i32** [[TMP18]], align 8
1363 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP16]], i32 0, i32 2
1364 // CHECK3-NEXT:    [[TMP21:%.*]] = load i32*, i32** [[_TMP14]], align 8
1365 // CHECK3-NEXT:    store i32* [[TMP21]], i32** [[TMP20]], align 8
1366 // CHECK3-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP16]], i32 0, i32 3
1367 // CHECK3-NEXT:    store i32** [[D_ADDR]], i32*** [[TMP22]], align 8
1368 // CHECK3-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP16]], i32 0, i32 4
1369 // CHECK3-NEXT:    store i32* [[TMP2]], i32** [[TMP23]], align 8
1370 // CHECK3-NEXT:    [[TMP24:%.*]] = load %class.anon*, %class.anon** [[_TMP10]], align 8
1371 // CHECK3-NEXT:    [[CALL:%.*]] = call i64 @"_ZZ4mainENK3$_0clEv"(%class.anon* nonnull align 8 dereferenceable(40) [[TMP24]]) #[[ATTR7:[0-9]+]]
1372 // CHECK3-NEXT:    br label [[DOTTERMINATION_NOTIFIER:%.*]]
1373 // CHECK3:       .termination.notifier:
1374 // CHECK3-NEXT:    call void @__kmpc_kernel_deinit(i16 1)
1375 // CHECK3-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
1376 // CHECK3-NEXT:    br label [[DOTEXIT]]
1377 // CHECK3:       .exit:
1378 // CHECK3-NEXT:    ret void
1379 //
1380 //
1381 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43
1382 // CHECK3-SAME: (i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1]] {
1383 // CHECK3-NEXT:  entry:
1384 // CHECK3-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
1385 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca i32*, align 8
1386 // CHECK3-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
1387 // CHECK3-NEXT:    [[D_ADDR:%.*]] = alloca i32*, align 8
1388 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
1389 // CHECK3-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon*, align 8
1390 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32*, align 8
1391 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32*, align 8
1392 // CHECK3-NEXT:    [[_TMP2:%.*]] = alloca %class.anon*, align 8
1393 // CHECK3-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [6 x i8*], align 8
1394 // CHECK3-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
1395 // CHECK3-NEXT:    store i32* [[B]], i32** [[B_ADDR]], align 8
1396 // CHECK3-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 8
1397 // CHECK3-NEXT:    store i32* [[D]], i32** [[D_ADDR]], align 8
1398 // CHECK3-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
1399 // CHECK3-NEXT:    store %class.anon* [[L]], %class.anon** [[L_ADDR]], align 8
1400 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
1401 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[B_ADDR]], align 8
1402 // CHECK3-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[C_ADDR]], align 8
1403 // CHECK3-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
1404 // CHECK3-NEXT:    [[TMP4:%.*]] = load %class.anon*, %class.anon** [[L_ADDR]], align 8
1405 // CHECK3-NEXT:    store i32* [[TMP1]], i32** [[TMP]], align 8
1406 // CHECK3-NEXT:    store i32* [[TMP2]], i32** [[_TMP1]], align 8
1407 // CHECK3-NEXT:    store %class.anon* [[TMP4]], %class.anon** [[_TMP2]], align 8
1408 // CHECK3-NEXT:    [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1409 // CHECK3-NEXT:    call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1)
1410 // CHECK3-NEXT:    call void @__kmpc_data_sharing_init_stack_spmd()
1411 // CHECK3-NEXT:    br label [[DOTEXECUTE:%.*]]
1412 // CHECK3:       .execute:
1413 // CHECK3-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
1414 // CHECK3-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[TMP]], align 8
1415 // CHECK3-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[_TMP1]], align 8
1416 // CHECK3-NEXT:    [[TMP8:%.*]] = load i32*, i32** [[D_ADDR]], align 8
1417 // CHECK3-NEXT:    [[TMP9:%.*]] = load %class.anon*, %class.anon** [[_TMP2]], align 8
1418 // CHECK3-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
1419 // CHECK3-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP0]] to i8*
1420 // CHECK3-NEXT:    store i8* [[TMP11]], i8** [[TMP10]], align 8
1421 // CHECK3-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
1422 // CHECK3-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP6]] to i8*
1423 // CHECK3-NEXT:    store i8* [[TMP13]], i8** [[TMP12]], align 8
1424 // CHECK3-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
1425 // CHECK3-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP7]] to i8*
1426 // CHECK3-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 8
1427 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
1428 // CHECK3-NEXT:    [[TMP17:%.*]] = bitcast i32* [[TMP8]] to i8*
1429 // CHECK3-NEXT:    store i8* [[TMP17]], i8** [[TMP16]], align 8
1430 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
1431 // CHECK3-NEXT:    [[TMP19:%.*]] = bitcast i32* [[TMP3]] to i8*
1432 // CHECK3-NEXT:    store i8* [[TMP19]], i8** [[TMP18]], align 8
1433 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 5
1434 // CHECK3-NEXT:    [[TMP21:%.*]] = bitcast %class.anon* [[TMP9]] to i8*
1435 // CHECK3-NEXT:    store i8* [[TMP21]], i8** [[TMP20]], align 8
1436 // CHECK3-NEXT:    [[TMP22:%.*]] = bitcast [6 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
1437 // CHECK3-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*, i32*, %class.anon*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP22]], i64 6)
1438 // CHECK3-NEXT:    br label [[DOTOMP_DEINIT:%.*]]
1439 // CHECK3:       .omp.deinit:
1440 // CHECK3-NEXT:    call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
1441 // CHECK3-NEXT:    br label [[DOTEXIT:%.*]]
1442 // CHECK3:       .exit:
1443 // CHECK3-NEXT:    ret void
1444 //
1445 //
1446 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__
1447 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1]] {
1448 // CHECK3-NEXT:  entry:
1449 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
1450 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
1451 // CHECK3-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
1452 // CHECK3-NEXT:    [[B_ADDR:%.*]] = alloca i32*, align 8
1453 // CHECK3-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
1454 // CHECK3-NEXT:    [[D_ADDR:%.*]] = alloca i32*, align 8
1455 // CHECK3-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
1456 // CHECK3-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon*, align 8
1457 // CHECK3-NEXT:    [[TMP:%.*]] = alloca i32*, align 8
1458 // CHECK3-NEXT:    [[_TMP1:%.*]] = alloca i32*, align 8
1459 // CHECK3-NEXT:    [[_TMP2:%.*]] = alloca %class.anon*, align 8
1460 // CHECK3-NEXT:    [[L3:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
1461 // CHECK3-NEXT:    [[_TMP4:%.*]] = alloca %class.anon*, align 8
1462 // CHECK3-NEXT:    [[ARGC5:%.*]] = alloca i32, align 4
1463 // CHECK3-NEXT:    [[B6:%.*]] = alloca i32, align 4
1464 // CHECK3-NEXT:    [[_TMP7:%.*]] = alloca i32*, align 8
1465 // CHECK3-NEXT:    [[C8:%.*]] = alloca i32, align 4
1466 // CHECK3-NEXT:    [[_TMP9:%.*]] = alloca i32*, align 8
1467 // CHECK3-NEXT:    [[A10:%.*]] = alloca i32, align 4
1468 // CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
1469 // CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
1470 // CHECK3-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
1471 // CHECK3-NEXT:    store i32* [[B]], i32** [[B_ADDR]], align 8
1472 // CHECK3-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 8
1473 // CHECK3-NEXT:    store i32* [[D]], i32** [[D_ADDR]], align 8
1474 // CHECK3-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
1475 // CHECK3-NEXT:    store %class.anon* [[L]], %class.anon** [[L_ADDR]], align 8
1476 // CHECK3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
1477 // CHECK3-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[B_ADDR]], align 8
1478 // CHECK3-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[C_ADDR]], align 8
1479 // CHECK3-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
1480 // CHECK3-NEXT:    [[TMP4:%.*]] = load %class.anon*, %class.anon** [[L_ADDR]], align 8
1481 // CHECK3-NEXT:    store i32* [[TMP1]], i32** [[TMP]], align 8
1482 // CHECK3-NEXT:    store i32* [[TMP2]], i32** [[_TMP1]], align 8
1483 // CHECK3-NEXT:    store %class.anon* [[TMP4]], %class.anon** [[_TMP2]], align 8
1484 // CHECK3-NEXT:    [[TMP5:%.*]] = load %class.anon*, %class.anon** [[_TMP2]], align 8
1485 // CHECK3-NEXT:    [[TMP6:%.*]] = bitcast %class.anon* [[L3]] to i8*
1486 // CHECK3-NEXT:    [[TMP7:%.*]] = bitcast %class.anon* [[TMP5]] to i8*
1487 // CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 [[TMP7]], i64 40, i1 false)
1488 // CHECK3-NEXT:    store %class.anon* [[L3]], %class.anon** [[_TMP4]], align 8
1489 // CHECK3-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4
1490 // CHECK3-NEXT:    store i32 [[TMP8]], i32* [[ARGC5]], align 4
1491 // CHECK3-NEXT:    [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8
1492 // CHECK3-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
1493 // CHECK3-NEXT:    store i32 [[TMP10]], i32* [[B6]], align 4
1494 // CHECK3-NEXT:    store i32* [[B6]], i32** [[_TMP7]], align 8
1495 // CHECK3-NEXT:    [[TMP11:%.*]] = load i32*, i32** [[_TMP1]], align 8
1496 // CHECK3-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
1497 // CHECK3-NEXT:    store i32 [[TMP12]], i32* [[C8]], align 4
1498 // CHECK3-NEXT:    store i32* [[C8]], i32** [[_TMP9]], align 8
1499 // CHECK3-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP3]], align 4
1500 // CHECK3-NEXT:    store i32 [[TMP13]], i32* [[A10]], align 4
1501 // CHECK3-NEXT:    [[TMP14:%.*]] = load %class.anon*, %class.anon** [[_TMP4]], align 8
1502 // CHECK3-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP14]], i32 0, i32 0
1503 // CHECK3-NEXT:    store i32* [[ARGC5]], i32** [[TMP15]], align 8
1504 // CHECK3-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP14]], i32 0, i32 1
1505 // CHECK3-NEXT:    [[TMP17:%.*]] = load i32*, i32** [[_TMP7]], align 8
1506 // CHECK3-NEXT:    store i32* [[TMP17]], i32** [[TMP16]], align 8
1507 // CHECK3-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP14]], i32 0, i32 2
1508 // CHECK3-NEXT:    [[TMP19:%.*]] = load i32*, i32** [[_TMP9]], align 8
1509 // CHECK3-NEXT:    store i32* [[TMP19]], i32** [[TMP18]], align 8
1510 // CHECK3-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP14]], i32 0, i32 3
1511 // CHECK3-NEXT:    store i32** [[D_ADDR]], i32*** [[TMP20]], align 8
1512 // CHECK3-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP14]], i32 0, i32 4
1513 // CHECK3-NEXT:    store i32* [[A10]], i32** [[TMP21]], align 8
1514 // CHECK3-NEXT:    [[TMP22:%.*]] = load %class.anon*, %class.anon** [[_TMP4]], align 8
1515 // CHECK3-NEXT:    [[CALL:%.*]] = call i64 @"_ZZ4mainENK3$_0clEv"(%class.anon* nonnull align 8 dereferenceable(40) [[TMP22]]) #[[ATTR7]]
1516 // CHECK3-NEXT:    ret void
1517 //
1518 //
1519 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27_worker
1520 // CHECK3-SAME: () #[[ATTR0]] {
1521 // CHECK3-NEXT:  entry:
1522 // CHECK3-NEXT:    [[WORK_FN:%.*]] = alloca i8*, align 8
1523 // CHECK3-NEXT:    [[EXEC_STATUS:%.*]] = alloca i8, align 1
1524 // CHECK3-NEXT:    store i8* null, i8** [[WORK_FN]], align 8
1525 // CHECK3-NEXT:    store i8 0, i8* [[EXEC_STATUS]], align 1
1526 // CHECK3-NEXT:    br label [[DOTAWAIT_WORK:%.*]]
1527 // CHECK3:       .await.work:
1528 // CHECK3-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
1529 // CHECK3-NEXT:    [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]])
1530 // CHECK3-NEXT:    [[TMP1:%.*]] = zext i1 [[TMP0]] to i8
1531 // CHECK3-NEXT:    store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1
1532 // CHECK3-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8
1533 // CHECK3-NEXT:    [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null
1534 // CHECK3-NEXT:    br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]]
1535 // CHECK3:       .select.workers:
1536 // CHECK3-NEXT:    [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1
1537 // CHECK3-NEXT:    [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0
1538 // CHECK3-NEXT:    br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]]
1539 // CHECK3:       .execute.parallel:
1540 // CHECK3-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
1541 // CHECK3-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)*
1542 // CHECK3-NEXT:    call void [[TMP5]](i16 0, i32 [[TMP4]])
1543 // CHECK3-NEXT:    br label [[DOTTERMINATE_PARALLEL:%.*]]
1544 // CHECK3:       .terminate.parallel:
1545 // CHECK3-NEXT:    call void @__kmpc_kernel_end_parallel()
1546 // CHECK3-NEXT:    br label [[DOTBARRIER_PARALLEL]]
1547 // CHECK3:       .barrier.parallel:
1548 // CHECK3-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
1549 // CHECK3-NEXT:    br label [[DOTAWAIT_WORK]]
1550 // CHECK3:       .exit:
1551 // CHECK3-NEXT:    ret void
1552 //
1553 //
1554 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27
1555 // CHECK3-SAME: (%struct.S* [[THIS:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] {
1556 // CHECK3-NEXT:  entry:
1557 // CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
1558 // CHECK3-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon.0*, align 8
1559 // CHECK3-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
1560 // CHECK3-NEXT:    [[L7:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
1561 // CHECK3-NEXT:    [[_TMP8:%.*]] = alloca %class.anon.0*, align 8
1562 // CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
1563 // CHECK3-NEXT:    store %class.anon.0* [[L]], %class.anon.0** [[L_ADDR]], align 8
1564 // CHECK3-NEXT:    [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
1565 // CHECK3-NEXT:    [[TMP1:%.*]] = load %class.anon.0*, %class.anon.0** [[L_ADDR]], align 8
1566 // CHECK3-NEXT:    store %class.anon.0* [[TMP1]], %class.anon.0** [[TMP]], align 8
1567 // CHECK3-NEXT:    [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
1568 // CHECK3-NEXT:    [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1569 // CHECK3-NEXT:    [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
1570 // CHECK3-NEXT:    [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]]
1571 // CHECK3-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]]
1572 // CHECK3-NEXT:    br i1 [[TMP2]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]]
1573 // CHECK3:       .worker:
1574 // CHECK3-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27_worker() #[[ATTR6]]
1575 // CHECK3-NEXT:    br label [[DOTEXIT:%.*]]
1576 // CHECK3:       .mastercheck:
1577 // CHECK3-NEXT:    [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
1578 // CHECK3-NEXT:    [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1579 // CHECK3-NEXT:    [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
1580 // CHECK3-NEXT:    [[TMP3:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1
1581 // CHECK3-NEXT:    [[TMP4:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1
1582 // CHECK3-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], -1
1583 // CHECK3-NEXT:    [[MASTER_TID:%.*]] = and i32 [[TMP4]], [[TMP5]]
1584 // CHECK3-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]]
1585 // CHECK3-NEXT:    br i1 [[TMP6]], label [[DOTMASTER:%.*]], label [[DOTEXIT]]
1586 // CHECK3:       .master:
1587 // CHECK3-NEXT:    [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1588 // CHECK3-NEXT:    [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
1589 // CHECK3-NEXT:    [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]]
1590 // CHECK3-NEXT:    call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1)
1591 // CHECK3-NEXT:    call void @__kmpc_data_sharing_init_stack()
1592 // CHECK3-NEXT:    [[TMP7:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
1593 // CHECK3-NEXT:    [[TMP8:%.*]] = bitcast %class.anon.0* [[L7]] to i8*
1594 // CHECK3-NEXT:    [[TMP9:%.*]] = bitcast %class.anon.0* [[TMP7]] to i8*
1595 // CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 [[TMP9]], i64 8, i1 false)
1596 // CHECK3-NEXT:    store %class.anon.0* [[L7]], %class.anon.0** [[_TMP8]], align 8
1597 // CHECK3-NEXT:    [[TMP10:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP8]], align 8
1598 // CHECK3-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP10]], i32 0, i32 0
1599 // CHECK3-NEXT:    store %struct.S* [[TMP0]], %struct.S** [[TMP11]], align 8
1600 // CHECK3-NEXT:    [[TMP12:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP8]], align 8
1601 // CHECK3-NEXT:    [[CALL:%.*]] = call i32 @_ZZN1S3fooEvENKUlvE_clEv(%class.anon.0* nonnull align 8 dereferenceable(8) [[TMP12]]) #[[ATTR7]]
1602 // CHECK3-NEXT:    br label [[DOTTERMINATION_NOTIFIER:%.*]]
1603 // CHECK3:       .termination.notifier:
1604 // CHECK3-NEXT:    call void @__kmpc_kernel_deinit(i16 1)
1605 // CHECK3-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
1606 // CHECK3-NEXT:    br label [[DOTEXIT]]
1607 // CHECK3:       .exit:
1608 // CHECK3-NEXT:    ret void
1609 //
1610 //
1611 // CHECK3-LABEL: define {{[^@]+}}@_ZZN1S3fooEvENKUlvE_clEv
1612 // CHECK3-SAME: (%class.anon.0* nonnull align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR4:[0-9]+]] comdat align 2 {
1613 // CHECK3-NEXT:  entry:
1614 // CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8
1615 // CHECK3-NEXT:    store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8
1616 // CHECK3-NEXT:    [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8
1617 // CHECK3-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0
1618 // CHECK3-NEXT:    [[TMP1:%.*]] = load %struct.S*, %struct.S** [[TMP0]], align 8
1619 // CHECK3-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[TMP1]], i32 0, i32 0
1620 // CHECK3-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
1621 // CHECK3-NEXT:    ret i32 [[TMP2]]
1622 //
1623 //
1624 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29
1625 // CHECK3-SAME: (%struct.S* [[THIS:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] {
1626 // CHECK3-NEXT:  entry:
1627 // CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
1628 // CHECK3-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon.0*, align 8
1629 // CHECK3-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
1630 // CHECK3-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8
1631 // CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
1632 // CHECK3-NEXT:    store %class.anon.0* [[L]], %class.anon.0** [[L_ADDR]], align 8
1633 // CHECK3-NEXT:    [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
1634 // CHECK3-NEXT:    [[TMP1:%.*]] = load %class.anon.0*, %class.anon.0** [[L_ADDR]], align 8
1635 // CHECK3-NEXT:    store %class.anon.0* [[TMP1]], %class.anon.0** [[TMP]], align 8
1636 // CHECK3-NEXT:    [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1637 // CHECK3-NEXT:    call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1)
1638 // CHECK3-NEXT:    call void @__kmpc_data_sharing_init_stack_spmd()
1639 // CHECK3-NEXT:    br label [[DOTEXECUTE:%.*]]
1640 // CHECK3:       .execute:
1641 // CHECK3-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
1642 // CHECK3-NEXT:    [[TMP3:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
1643 // CHECK3-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
1644 // CHECK3-NEXT:    [[TMP5:%.*]] = bitcast %struct.S* [[TMP0]] to i8*
1645 // CHECK3-NEXT:    store i8* [[TMP5]], i8** [[TMP4]], align 8
1646 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
1647 // CHECK3-NEXT:    [[TMP7:%.*]] = bitcast %class.anon.0* [[TMP3]] to i8*
1648 // CHECK3-NEXT:    store i8* [[TMP7]], i8** [[TMP6]], align 8
1649 // CHECK3-NEXT:    [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
1650 // CHECK3-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.S*, %class.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP8]], i64 2)
1651 // CHECK3-NEXT:    br label [[DOTOMP_DEINIT:%.*]]
1652 // CHECK3:       .omp.deinit:
1653 // CHECK3-NEXT:    call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
1654 // CHECK3-NEXT:    br label [[DOTEXIT:%.*]]
1655 // CHECK3:       .exit:
1656 // CHECK3-NEXT:    ret void
1657 //
1658 //
1659 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1
1660 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.S* [[THIS:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] {
1661 // CHECK3-NEXT:  entry:
1662 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
1663 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
1664 // CHECK3-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
1665 // CHECK3-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon.0*, align 8
1666 // CHECK3-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
1667 // CHECK3-NEXT:    [[L1:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
1668 // CHECK3-NEXT:    [[_TMP2:%.*]] = alloca %class.anon.0*, align 8
1669 // CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
1670 // CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
1671 // CHECK3-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
1672 // CHECK3-NEXT:    store %class.anon.0* [[L]], %class.anon.0** [[L_ADDR]], align 8
1673 // CHECK3-NEXT:    [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
1674 // CHECK3-NEXT:    [[TMP1:%.*]] = load %class.anon.0*, %class.anon.0** [[L_ADDR]], align 8
1675 // CHECK3-NEXT:    store %class.anon.0* [[TMP1]], %class.anon.0** [[TMP]], align 8
1676 // CHECK3-NEXT:    [[TMP2:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
1677 // CHECK3-NEXT:    [[TMP3:%.*]] = bitcast %class.anon.0* [[L1]] to i8*
1678 // CHECK3-NEXT:    [[TMP4:%.*]] = bitcast %class.anon.0* [[TMP2]] to i8*
1679 // CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 8, i1 false)
1680 // CHECK3-NEXT:    store %class.anon.0* [[L1]], %class.anon.0** [[_TMP2]], align 8
1681 // CHECK3-NEXT:    [[TMP5:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8
1682 // CHECK3-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP5]], i32 0, i32 0
1683 // CHECK3-NEXT:    store %struct.S* [[TMP0]], %struct.S** [[TMP6]], align 8
1684 // CHECK3-NEXT:    [[TMP7:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8
1685 // CHECK3-NEXT:    [[CALL:%.*]] = call i32 @_ZZN1S3fooEvENKUlvE_clEv(%class.anon.0* nonnull align 8 dereferenceable(8) [[TMP7]]) #[[ATTR7]]
1686 // CHECK3-NEXT:    ret void
1687 //
1688 //
1689 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18
1690 // CHECK3-SAME: (%class.anon.0* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR1]] {
1691 // CHECK3-NEXT:  entry:
1692 // CHECK3-NEXT:    [[T_ADDR:%.*]] = alloca %class.anon.0*, align 8
1693 // CHECK3-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
1694 // CHECK3-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
1695 // CHECK3-NEXT:    store %class.anon.0* [[T]], %class.anon.0** [[T_ADDR]], align 8
1696 // CHECK3-NEXT:    [[TMP0:%.*]] = load %class.anon.0*, %class.anon.0** [[T_ADDR]], align 8
1697 // CHECK3-NEXT:    store %class.anon.0* [[TMP0]], %class.anon.0** [[TMP]], align 8
1698 // CHECK3-NEXT:    [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1699 // CHECK3-NEXT:    call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1)
1700 // CHECK3-NEXT:    call void @__kmpc_data_sharing_init_stack_spmd()
1701 // CHECK3-NEXT:    br label [[DOTEXECUTE:%.*]]
1702 // CHECK3:       .execute:
1703 // CHECK3-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
1704 // CHECK3-NEXT:    [[TMP2:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
1705 // CHECK3-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
1706 // CHECK3-NEXT:    [[TMP4:%.*]] = bitcast %class.anon.0* [[TMP2]] to i8*
1707 // CHECK3-NEXT:    store i8* [[TMP4]], i8** [[TMP3]], align 8
1708 // CHECK3-NEXT:    [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
1709 // CHECK3-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %class.anon.0*)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP5]], i64 1)
1710 // CHECK3-NEXT:    br label [[DOTOMP_DEINIT:%.*]]
1711 // CHECK3:       .omp.deinit:
1712 // CHECK3-NEXT:    call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
1713 // CHECK3-NEXT:    br label [[DOTEXIT:%.*]]
1714 // CHECK3:       .exit:
1715 // CHECK3-NEXT:    ret void
1716 //
1717 //
1718 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__2
1719 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR1]] {
1720 // CHECK3-NEXT:  entry:
1721 // CHECK3-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
1722 // CHECK3-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
1723 // CHECK3-NEXT:    [[T_ADDR:%.*]] = alloca %class.anon.0*, align 8
1724 // CHECK3-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
1725 // CHECK3-NEXT:    [[T1:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
1726 // CHECK3-NEXT:    [[_TMP2:%.*]] = alloca %class.anon.0*, align 8
1727 // CHECK3-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
1728 // CHECK3-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
1729 // CHECK3-NEXT:    store %class.anon.0* [[T]], %class.anon.0** [[T_ADDR]], align 8
1730 // CHECK3-NEXT:    [[TMP0:%.*]] = load %class.anon.0*, %class.anon.0** [[T_ADDR]], align 8
1731 // CHECK3-NEXT:    store %class.anon.0* [[TMP0]], %class.anon.0** [[TMP]], align 8
1732 // CHECK3-NEXT:    [[TMP1:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
1733 // CHECK3-NEXT:    [[TMP2:%.*]] = bitcast %class.anon.0* [[T1]] to i8*
1734 // CHECK3-NEXT:    [[TMP3:%.*]] = bitcast %class.anon.0* [[TMP1]] to i8*
1735 // CHECK3-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 8, i1 false)
1736 // CHECK3-NEXT:    store %class.anon.0* [[T1]], %class.anon.0** [[_TMP2]], align 8
1737 // CHECK3-NEXT:    [[TMP4:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8
1738 // CHECK3-NEXT:    [[TMP5:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8
1739 // CHECK3-NEXT:    [[CALL:%.*]] = call i32 @_ZZN1S3fooEvENKUlvE_clEv(%class.anon.0* nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR7]]
1740 // CHECK3-NEXT:    ret void
1741 //
1742 //
1743 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_worker
1744 // CHECK4-SAME: () #[[ATTR0:[0-9]+]] {
1745 // CHECK4-NEXT:  entry:
1746 // CHECK4-NEXT:    [[WORK_FN:%.*]] = alloca i8*, align 8
1747 // CHECK4-NEXT:    [[EXEC_STATUS:%.*]] = alloca i8, align 1
1748 // CHECK4-NEXT:    store i8* null, i8** [[WORK_FN]], align 8
1749 // CHECK4-NEXT:    store i8 0, i8* [[EXEC_STATUS]], align 1
1750 // CHECK4-NEXT:    br label [[DOTAWAIT_WORK:%.*]]
1751 // CHECK4:       .await.work:
1752 // CHECK4-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
1753 // CHECK4-NEXT:    [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]])
1754 // CHECK4-NEXT:    [[TMP1:%.*]] = zext i1 [[TMP0]] to i8
1755 // CHECK4-NEXT:    store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1
1756 // CHECK4-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8
1757 // CHECK4-NEXT:    [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null
1758 // CHECK4-NEXT:    br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]]
1759 // CHECK4:       .select.workers:
1760 // CHECK4-NEXT:    [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1
1761 // CHECK4-NEXT:    [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0
1762 // CHECK4-NEXT:    br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]]
1763 // CHECK4:       .execute.parallel:
1764 // CHECK4-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
1765 // CHECK4-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)*
1766 // CHECK4-NEXT:    call void [[TMP5]](i16 0, i32 [[TMP4]])
1767 // CHECK4-NEXT:    br label [[DOTTERMINATE_PARALLEL:%.*]]
1768 // CHECK4:       .terminate.parallel:
1769 // CHECK4-NEXT:    call void @__kmpc_kernel_end_parallel()
1770 // CHECK4-NEXT:    br label [[DOTBARRIER_PARALLEL]]
1771 // CHECK4:       .barrier.parallel:
1772 // CHECK4-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
1773 // CHECK4-NEXT:    br label [[DOTAWAIT_WORK]]
1774 // CHECK4:       .exit:
1775 // CHECK4-NEXT:    ret void
1776 //
1777 //
1778 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41
1779 // CHECK4-SAME: (i64 [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1:[0-9]+]] {
1780 // CHECK4-NEXT:  entry:
1781 // CHECK4-NEXT:    [[ARGC_ADDR:%.*]] = alloca i64, align 8
1782 // CHECK4-NEXT:    [[B_ADDR:%.*]] = alloca i32*, align 8
1783 // CHECK4-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
1784 // CHECK4-NEXT:    [[D_ADDR:%.*]] = alloca i32*, align 8
1785 // CHECK4-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
1786 // CHECK4-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon*, align 8
1787 // CHECK4-NEXT:    [[TMP:%.*]] = alloca i32*, align 8
1788 // CHECK4-NEXT:    [[_TMP1:%.*]] = alloca i32*, align 8
1789 // CHECK4-NEXT:    [[_TMP2:%.*]] = alloca %class.anon*, align 8
1790 // CHECK4-NEXT:    [[L9:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
1791 // CHECK4-NEXT:    [[_TMP10:%.*]] = alloca %class.anon*, align 8
1792 // CHECK4-NEXT:    [[B11:%.*]] = alloca i32, align 4
1793 // CHECK4-NEXT:    [[_TMP12:%.*]] = alloca i32*, align 8
1794 // CHECK4-NEXT:    [[C13:%.*]] = alloca i32, align 4
1795 // CHECK4-NEXT:    [[_TMP14:%.*]] = alloca i32*, align 8
1796 // CHECK4-NEXT:    store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8
1797 // CHECK4-NEXT:    store i32* [[B]], i32** [[B_ADDR]], align 8
1798 // CHECK4-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 8
1799 // CHECK4-NEXT:    store i32* [[D]], i32** [[D_ADDR]], align 8
1800 // CHECK4-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
1801 // CHECK4-NEXT:    store %class.anon* [[L]], %class.anon** [[L_ADDR]], align 8
1802 // CHECK4-NEXT:    [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32*
1803 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[B_ADDR]], align 8
1804 // CHECK4-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[C_ADDR]], align 8
1805 // CHECK4-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8
1806 // CHECK4-NEXT:    [[TMP3:%.*]] = load %class.anon*, %class.anon** [[L_ADDR]], align 8
1807 // CHECK4-NEXT:    store i32* [[TMP0]], i32** [[TMP]], align 8
1808 // CHECK4-NEXT:    store i32* [[TMP1]], i32** [[_TMP1]], align 8
1809 // CHECK4-NEXT:    store %class.anon* [[TMP3]], %class.anon** [[_TMP2]], align 8
1810 // CHECK4-NEXT:    [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
1811 // CHECK4-NEXT:    [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1812 // CHECK4-NEXT:    [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
1813 // CHECK4-NEXT:    [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]]
1814 // CHECK4-NEXT:    [[TMP4:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]]
1815 // CHECK4-NEXT:    br i1 [[TMP4]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]]
1816 // CHECK4:       .worker:
1817 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_worker() #[[ATTR6:[0-9]+]]
1818 // CHECK4-NEXT:    br label [[DOTEXIT:%.*]]
1819 // CHECK4:       .mastercheck:
1820 // CHECK4-NEXT:    [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
1821 // CHECK4-NEXT:    [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1822 // CHECK4-NEXT:    [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
1823 // CHECK4-NEXT:    [[TMP5:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE5]], 1
1824 // CHECK4-NEXT:    [[TMP6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], 1
1825 // CHECK4-NEXT:    [[TMP7:%.*]] = xor i32 [[TMP5]], -1
1826 // CHECK4-NEXT:    [[MASTER_TID:%.*]] = and i32 [[TMP6]], [[TMP7]]
1827 // CHECK4-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[NVPTX_TID3]], [[MASTER_TID]]
1828 // CHECK4-NEXT:    br i1 [[TMP8]], label [[DOTMASTER:%.*]], label [[DOTEXIT]]
1829 // CHECK4:       .master:
1830 // CHECK4-NEXT:    [[NVPTX_NUM_THREADS6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1831 // CHECK4-NEXT:    [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
1832 // CHECK4-NEXT:    [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]]
1833 // CHECK4-NEXT:    call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1)
1834 // CHECK4-NEXT:    call void @__kmpc_data_sharing_init_stack()
1835 // CHECK4-NEXT:    [[TMP9:%.*]] = load %class.anon*, %class.anon** [[_TMP2]], align 8
1836 // CHECK4-NEXT:    [[TMP10:%.*]] = bitcast %class.anon* [[L9]] to i8*
1837 // CHECK4-NEXT:    [[TMP11:%.*]] = bitcast %class.anon* [[TMP9]] to i8*
1838 // CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP10]], i8* align 8 [[TMP11]], i64 40, i1 false)
1839 // CHECK4-NEXT:    store %class.anon* [[L9]], %class.anon** [[_TMP10]], align 8
1840 // CHECK4-NEXT:    [[TMP12:%.*]] = load i32*, i32** [[TMP]], align 8
1841 // CHECK4-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4
1842 // CHECK4-NEXT:    store i32 [[TMP13]], i32* [[B11]], align 4
1843 // CHECK4-NEXT:    store i32* [[B11]], i32** [[_TMP12]], align 8
1844 // CHECK4-NEXT:    [[TMP14:%.*]] = load i32*, i32** [[_TMP1]], align 8
1845 // CHECK4-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
1846 // CHECK4-NEXT:    store i32 [[TMP15]], i32* [[C13]], align 4
1847 // CHECK4-NEXT:    store i32* [[C13]], i32** [[_TMP14]], align 8
1848 // CHECK4-NEXT:    [[TMP16:%.*]] = load %class.anon*, %class.anon** [[_TMP10]], align 8
1849 // CHECK4-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP16]], i32 0, i32 0
1850 // CHECK4-NEXT:    store i32* [[CONV]], i32** [[TMP17]], align 8
1851 // CHECK4-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP16]], i32 0, i32 1
1852 // CHECK4-NEXT:    [[TMP19:%.*]] = load i32*, i32** [[_TMP12]], align 8
1853 // CHECK4-NEXT:    store i32* [[TMP19]], i32** [[TMP18]], align 8
1854 // CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP16]], i32 0, i32 2
1855 // CHECK4-NEXT:    [[TMP21:%.*]] = load i32*, i32** [[_TMP14]], align 8
1856 // CHECK4-NEXT:    store i32* [[TMP21]], i32** [[TMP20]], align 8
1857 // CHECK4-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP16]], i32 0, i32 3
1858 // CHECK4-NEXT:    store i32** [[D_ADDR]], i32*** [[TMP22]], align 8
1859 // CHECK4-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP16]], i32 0, i32 4
1860 // CHECK4-NEXT:    store i32* [[TMP2]], i32** [[TMP23]], align 8
1861 // CHECK4-NEXT:    [[TMP24:%.*]] = load %class.anon*, %class.anon** [[_TMP10]], align 8
1862 // CHECK4-NEXT:    [[CALL:%.*]] = call i64 @"_ZZ4mainENK3$_0clEv"(%class.anon* nonnull align 8 dereferenceable(40) [[TMP24]]) #[[ATTR7:[0-9]+]]
1863 // CHECK4-NEXT:    br label [[DOTTERMINATION_NOTIFIER:%.*]]
1864 // CHECK4:       .termination.notifier:
1865 // CHECK4-NEXT:    call void @__kmpc_kernel_deinit(i16 1)
1866 // CHECK4-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
1867 // CHECK4-NEXT:    br label [[DOTEXIT]]
1868 // CHECK4:       .exit:
1869 // CHECK4-NEXT:    ret void
1870 //
1871 //
1872 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l43
1873 // CHECK4-SAME: (i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1]] {
1874 // CHECK4-NEXT:  entry:
1875 // CHECK4-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
1876 // CHECK4-NEXT:    [[B_ADDR:%.*]] = alloca i32*, align 8
1877 // CHECK4-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
1878 // CHECK4-NEXT:    [[D_ADDR:%.*]] = alloca i32*, align 8
1879 // CHECK4-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
1880 // CHECK4-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon*, align 8
1881 // CHECK4-NEXT:    [[TMP:%.*]] = alloca i32*, align 8
1882 // CHECK4-NEXT:    [[_TMP1:%.*]] = alloca i32*, align 8
1883 // CHECK4-NEXT:    [[_TMP2:%.*]] = alloca %class.anon*, align 8
1884 // CHECK4-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [6 x i8*], align 8
1885 // CHECK4-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
1886 // CHECK4-NEXT:    store i32* [[B]], i32** [[B_ADDR]], align 8
1887 // CHECK4-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 8
1888 // CHECK4-NEXT:    store i32* [[D]], i32** [[D_ADDR]], align 8
1889 // CHECK4-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
1890 // CHECK4-NEXT:    store %class.anon* [[L]], %class.anon** [[L_ADDR]], align 8
1891 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
1892 // CHECK4-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[B_ADDR]], align 8
1893 // CHECK4-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[C_ADDR]], align 8
1894 // CHECK4-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
1895 // CHECK4-NEXT:    [[TMP4:%.*]] = load %class.anon*, %class.anon** [[L_ADDR]], align 8
1896 // CHECK4-NEXT:    store i32* [[TMP1]], i32** [[TMP]], align 8
1897 // CHECK4-NEXT:    store i32* [[TMP2]], i32** [[_TMP1]], align 8
1898 // CHECK4-NEXT:    store %class.anon* [[TMP4]], %class.anon** [[_TMP2]], align 8
1899 // CHECK4-NEXT:    [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
1900 // CHECK4-NEXT:    call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1)
1901 // CHECK4-NEXT:    call void @__kmpc_data_sharing_init_stack_spmd()
1902 // CHECK4-NEXT:    br label [[DOTEXECUTE:%.*]]
1903 // CHECK4:       .execute:
1904 // CHECK4-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
1905 // CHECK4-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[TMP]], align 8
1906 // CHECK4-NEXT:    [[TMP7:%.*]] = load i32*, i32** [[_TMP1]], align 8
1907 // CHECK4-NEXT:    [[TMP8:%.*]] = load i32*, i32** [[D_ADDR]], align 8
1908 // CHECK4-NEXT:    [[TMP9:%.*]] = load %class.anon*, %class.anon** [[_TMP2]], align 8
1909 // CHECK4-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
1910 // CHECK4-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP0]] to i8*
1911 // CHECK4-NEXT:    store i8* [[TMP11]], i8** [[TMP10]], align 8
1912 // CHECK4-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
1913 // CHECK4-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP6]] to i8*
1914 // CHECK4-NEXT:    store i8* [[TMP13]], i8** [[TMP12]], align 8
1915 // CHECK4-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2
1916 // CHECK4-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP7]] to i8*
1917 // CHECK4-NEXT:    store i8* [[TMP15]], i8** [[TMP14]], align 8
1918 // CHECK4-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3
1919 // CHECK4-NEXT:    [[TMP17:%.*]] = bitcast i32* [[TMP8]] to i8*
1920 // CHECK4-NEXT:    store i8* [[TMP17]], i8** [[TMP16]], align 8
1921 // CHECK4-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4
1922 // CHECK4-NEXT:    [[TMP19:%.*]] = bitcast i32* [[TMP3]] to i8*
1923 // CHECK4-NEXT:    store i8* [[TMP19]], i8** [[TMP18]], align 8
1924 // CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 5
1925 // CHECK4-NEXT:    [[TMP21:%.*]] = bitcast %class.anon* [[TMP9]] to i8*
1926 // CHECK4-NEXT:    store i8* [[TMP21]], i8** [[TMP20]], align 8
1927 // CHECK4-NEXT:    [[TMP22:%.*]] = bitcast [6 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
1928 // CHECK4-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*, i32*, %class.anon*)* @__omp_outlined__ to i8*), i8* null, i8** [[TMP22]], i64 6)
1929 // CHECK4-NEXT:    br label [[DOTOMP_DEINIT:%.*]]
1930 // CHECK4:       .omp.deinit:
1931 // CHECK4-NEXT:    call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
1932 // CHECK4-NEXT:    br label [[DOTEXIT:%.*]]
1933 // CHECK4:       .exit:
1934 // CHECK4-NEXT:    ret void
1935 //
1936 //
1937 // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__
1938 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[C:%.*]], i32* [[D:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], %class.anon* nonnull align 8 dereferenceable(40) [[L:%.*]]) #[[ATTR1]] {
1939 // CHECK4-NEXT:  entry:
1940 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
1941 // CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
1942 // CHECK4-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32*, align 8
1943 // CHECK4-NEXT:    [[B_ADDR:%.*]] = alloca i32*, align 8
1944 // CHECK4-NEXT:    [[C_ADDR:%.*]] = alloca i32*, align 8
1945 // CHECK4-NEXT:    [[D_ADDR:%.*]] = alloca i32*, align 8
1946 // CHECK4-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
1947 // CHECK4-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon*, align 8
1948 // CHECK4-NEXT:    [[TMP:%.*]] = alloca i32*, align 8
1949 // CHECK4-NEXT:    [[_TMP1:%.*]] = alloca i32*, align 8
1950 // CHECK4-NEXT:    [[_TMP2:%.*]] = alloca %class.anon*, align 8
1951 // CHECK4-NEXT:    [[L3:%.*]] = alloca [[CLASS_ANON:%.*]], align 8
1952 // CHECK4-NEXT:    [[_TMP4:%.*]] = alloca %class.anon*, align 8
1953 // CHECK4-NEXT:    [[ARGC5:%.*]] = alloca i32, align 4
1954 // CHECK4-NEXT:    [[B6:%.*]] = alloca i32, align 4
1955 // CHECK4-NEXT:    [[_TMP7:%.*]] = alloca i32*, align 8
1956 // CHECK4-NEXT:    [[C8:%.*]] = alloca i32, align 4
1957 // CHECK4-NEXT:    [[_TMP9:%.*]] = alloca i32*, align 8
1958 // CHECK4-NEXT:    [[A10:%.*]] = alloca i32, align 4
1959 // CHECK4-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
1960 // CHECK4-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
1961 // CHECK4-NEXT:    store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8
1962 // CHECK4-NEXT:    store i32* [[B]], i32** [[B_ADDR]], align 8
1963 // CHECK4-NEXT:    store i32* [[C]], i32** [[C_ADDR]], align 8
1964 // CHECK4-NEXT:    store i32* [[D]], i32** [[D_ADDR]], align 8
1965 // CHECK4-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
1966 // CHECK4-NEXT:    store %class.anon* [[L]], %class.anon** [[L_ADDR]], align 8
1967 // CHECK4-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8
1968 // CHECK4-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[B_ADDR]], align 8
1969 // CHECK4-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[C_ADDR]], align 8
1970 // CHECK4-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
1971 // CHECK4-NEXT:    [[TMP4:%.*]] = load %class.anon*, %class.anon** [[L_ADDR]], align 8
1972 // CHECK4-NEXT:    store i32* [[TMP1]], i32** [[TMP]], align 8
1973 // CHECK4-NEXT:    store i32* [[TMP2]], i32** [[_TMP1]], align 8
1974 // CHECK4-NEXT:    store %class.anon* [[TMP4]], %class.anon** [[_TMP2]], align 8
1975 // CHECK4-NEXT:    [[TMP5:%.*]] = load %class.anon*, %class.anon** [[_TMP2]], align 8
1976 // CHECK4-NEXT:    [[TMP6:%.*]] = bitcast %class.anon* [[L3]] to i8*
1977 // CHECK4-NEXT:    [[TMP7:%.*]] = bitcast %class.anon* [[TMP5]] to i8*
1978 // CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP6]], i8* align 8 [[TMP7]], i64 40, i1 false)
1979 // CHECK4-NEXT:    store %class.anon* [[L3]], %class.anon** [[_TMP4]], align 8
1980 // CHECK4-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4
1981 // CHECK4-NEXT:    store i32 [[TMP8]], i32* [[ARGC5]], align 4
1982 // CHECK4-NEXT:    [[TMP9:%.*]] = load i32*, i32** [[TMP]], align 8
1983 // CHECK4-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
1984 // CHECK4-NEXT:    store i32 [[TMP10]], i32* [[B6]], align 4
1985 // CHECK4-NEXT:    store i32* [[B6]], i32** [[_TMP7]], align 8
1986 // CHECK4-NEXT:    [[TMP11:%.*]] = load i32*, i32** [[_TMP1]], align 8
1987 // CHECK4-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4
1988 // CHECK4-NEXT:    store i32 [[TMP12]], i32* [[C8]], align 4
1989 // CHECK4-NEXT:    store i32* [[C8]], i32** [[_TMP9]], align 8
1990 // CHECK4-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP3]], align 4
1991 // CHECK4-NEXT:    store i32 [[TMP13]], i32* [[A10]], align 4
1992 // CHECK4-NEXT:    [[TMP14:%.*]] = load %class.anon*, %class.anon** [[_TMP4]], align 8
1993 // CHECK4-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP14]], i32 0, i32 0
1994 // CHECK4-NEXT:    store i32* [[ARGC5]], i32** [[TMP15]], align 8
1995 // CHECK4-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP14]], i32 0, i32 1
1996 // CHECK4-NEXT:    [[TMP17:%.*]] = load i32*, i32** [[_TMP7]], align 8
1997 // CHECK4-NEXT:    store i32* [[TMP17]], i32** [[TMP16]], align 8
1998 // CHECK4-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP14]], i32 0, i32 2
1999 // CHECK4-NEXT:    [[TMP19:%.*]] = load i32*, i32** [[_TMP9]], align 8
2000 // CHECK4-NEXT:    store i32* [[TMP19]], i32** [[TMP18]], align 8
2001 // CHECK4-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP14]], i32 0, i32 3
2002 // CHECK4-NEXT:    store i32** [[D_ADDR]], i32*** [[TMP20]], align 8
2003 // CHECK4-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[CLASS_ANON]], %class.anon* [[TMP14]], i32 0, i32 4
2004 // CHECK4-NEXT:    store i32* [[A10]], i32** [[TMP21]], align 8
2005 // CHECK4-NEXT:    [[TMP22:%.*]] = load %class.anon*, %class.anon** [[_TMP4]], align 8
2006 // CHECK4-NEXT:    [[CALL:%.*]] = call i64 @"_ZZ4mainENK3$_0clEv"(%class.anon* nonnull align 8 dereferenceable(40) [[TMP22]]) #[[ATTR7]]
2007 // CHECK4-NEXT:    ret void
2008 //
2009 //
2010 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27_worker
2011 // CHECK4-SAME: () #[[ATTR0]] {
2012 // CHECK4-NEXT:  entry:
2013 // CHECK4-NEXT:    [[WORK_FN:%.*]] = alloca i8*, align 8
2014 // CHECK4-NEXT:    [[EXEC_STATUS:%.*]] = alloca i8, align 1
2015 // CHECK4-NEXT:    store i8* null, i8** [[WORK_FN]], align 8
2016 // CHECK4-NEXT:    store i8 0, i8* [[EXEC_STATUS]], align 1
2017 // CHECK4-NEXT:    br label [[DOTAWAIT_WORK:%.*]]
2018 // CHECK4:       .await.work:
2019 // CHECK4-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
2020 // CHECK4-NEXT:    [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]])
2021 // CHECK4-NEXT:    [[TMP1:%.*]] = zext i1 [[TMP0]] to i8
2022 // CHECK4-NEXT:    store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1
2023 // CHECK4-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8
2024 // CHECK4-NEXT:    [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null
2025 // CHECK4-NEXT:    br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]]
2026 // CHECK4:       .select.workers:
2027 // CHECK4-NEXT:    [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1
2028 // CHECK4-NEXT:    [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0
2029 // CHECK4-NEXT:    br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]]
2030 // CHECK4:       .execute.parallel:
2031 // CHECK4-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
2032 // CHECK4-NEXT:    [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)*
2033 // CHECK4-NEXT:    call void [[TMP5]](i16 0, i32 [[TMP4]])
2034 // CHECK4-NEXT:    br label [[DOTTERMINATE_PARALLEL:%.*]]
2035 // CHECK4:       .terminate.parallel:
2036 // CHECK4-NEXT:    call void @__kmpc_kernel_end_parallel()
2037 // CHECK4-NEXT:    br label [[DOTBARRIER_PARALLEL]]
2038 // CHECK4:       .barrier.parallel:
2039 // CHECK4-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
2040 // CHECK4-NEXT:    br label [[DOTAWAIT_WORK]]
2041 // CHECK4:       .exit:
2042 // CHECK4-NEXT:    ret void
2043 //
2044 //
2045 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27
2046 // CHECK4-SAME: (%struct.S* [[THIS:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] {
2047 // CHECK4-NEXT:  entry:
2048 // CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
2049 // CHECK4-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon.0*, align 8
2050 // CHECK4-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
2051 // CHECK4-NEXT:    [[L7:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
2052 // CHECK4-NEXT:    [[_TMP8:%.*]] = alloca %class.anon.0*, align 8
2053 // CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
2054 // CHECK4-NEXT:    store %class.anon.0* [[L]], %class.anon.0** [[L_ADDR]], align 8
2055 // CHECK4-NEXT:    [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
2056 // CHECK4-NEXT:    [[TMP1:%.*]] = load %class.anon.0*, %class.anon.0** [[L_ADDR]], align 8
2057 // CHECK4-NEXT:    store %class.anon.0* [[TMP1]], %class.anon.0** [[TMP]], align 8
2058 // CHECK4-NEXT:    [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
2059 // CHECK4-NEXT:    [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
2060 // CHECK4-NEXT:    [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
2061 // CHECK4-NEXT:    [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]]
2062 // CHECK4-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]]
2063 // CHECK4-NEXT:    br i1 [[TMP2]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]]
2064 // CHECK4:       .worker:
2065 // CHECK4-NEXT:    call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l27_worker() #[[ATTR6]]
2066 // CHECK4-NEXT:    br label [[DOTEXIT:%.*]]
2067 // CHECK4:       .mastercheck:
2068 // CHECK4-NEXT:    [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
2069 // CHECK4-NEXT:    [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
2070 // CHECK4-NEXT:    [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
2071 // CHECK4-NEXT:    [[TMP3:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1
2072 // CHECK4-NEXT:    [[TMP4:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1
2073 // CHECK4-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], -1
2074 // CHECK4-NEXT:    [[MASTER_TID:%.*]] = and i32 [[TMP4]], [[TMP5]]
2075 // CHECK4-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]]
2076 // CHECK4-NEXT:    br i1 [[TMP6]], label [[DOTMASTER:%.*]], label [[DOTEXIT]]
2077 // CHECK4:       .master:
2078 // CHECK4-NEXT:    [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
2079 // CHECK4-NEXT:    [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
2080 // CHECK4-NEXT:    [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]]
2081 // CHECK4-NEXT:    call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1)
2082 // CHECK4-NEXT:    call void @__kmpc_data_sharing_init_stack()
2083 // CHECK4-NEXT:    [[TMP7:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
2084 // CHECK4-NEXT:    [[TMP8:%.*]] = bitcast %class.anon.0* [[L7]] to i8*
2085 // CHECK4-NEXT:    [[TMP9:%.*]] = bitcast %class.anon.0* [[TMP7]] to i8*
2086 // CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP8]], i8* align 8 [[TMP9]], i64 8, i1 false)
2087 // CHECK4-NEXT:    store %class.anon.0* [[L7]], %class.anon.0** [[_TMP8]], align 8
2088 // CHECK4-NEXT:    [[TMP10:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP8]], align 8
2089 // CHECK4-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP10]], i32 0, i32 0
2090 // CHECK4-NEXT:    store %struct.S* [[TMP0]], %struct.S** [[TMP11]], align 8
2091 // CHECK4-NEXT:    [[TMP12:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP8]], align 8
2092 // CHECK4-NEXT:    [[CALL:%.*]] = call i32 @_ZZN1S3fooEvENKUlvE_clEv(%class.anon.0* nonnull align 8 dereferenceable(8) [[TMP12]]) #[[ATTR7]]
2093 // CHECK4-NEXT:    br label [[DOTTERMINATION_NOTIFIER:%.*]]
2094 // CHECK4:       .termination.notifier:
2095 // CHECK4-NEXT:    call void @__kmpc_kernel_deinit(i16 1)
2096 // CHECK4-NEXT:    call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
2097 // CHECK4-NEXT:    br label [[DOTEXIT]]
2098 // CHECK4:       .exit:
2099 // CHECK4-NEXT:    ret void
2100 //
2101 //
2102 // CHECK4-LABEL: define {{[^@]+}}@_ZZN1S3fooEvENKUlvE_clEv
2103 // CHECK4-SAME: (%class.anon.0* nonnull align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR4:[0-9]+]] comdat align 2 {
2104 // CHECK4-NEXT:  entry:
2105 // CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %class.anon.0*, align 8
2106 // CHECK4-NEXT:    store %class.anon.0* [[THIS]], %class.anon.0** [[THIS_ADDR]], align 8
2107 // CHECK4-NEXT:    [[THIS1:%.*]] = load %class.anon.0*, %class.anon.0** [[THIS_ADDR]], align 8
2108 // CHECK4-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[CLASS_ANON_0:%.*]], %class.anon.0* [[THIS1]], i32 0, i32 0
2109 // CHECK4-NEXT:    [[TMP1:%.*]] = load %struct.S*, %struct.S** [[TMP0]], align 8
2110 // CHECK4-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[TMP1]], i32 0, i32 0
2111 // CHECK4-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
2112 // CHECK4-NEXT:    ret i32 [[TMP2]]
2113 //
2114 //
2115 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN1S3fooEv_l29
2116 // CHECK4-SAME: (%struct.S* [[THIS:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] {
2117 // CHECK4-NEXT:  entry:
2118 // CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
2119 // CHECK4-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon.0*, align 8
2120 // CHECK4-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
2121 // CHECK4-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8
2122 // CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
2123 // CHECK4-NEXT:    store %class.anon.0* [[L]], %class.anon.0** [[L_ADDR]], align 8
2124 // CHECK4-NEXT:    [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
2125 // CHECK4-NEXT:    [[TMP1:%.*]] = load %class.anon.0*, %class.anon.0** [[L_ADDR]], align 8
2126 // CHECK4-NEXT:    store %class.anon.0* [[TMP1]], %class.anon.0** [[TMP]], align 8
2127 // CHECK4-NEXT:    [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
2128 // CHECK4-NEXT:    call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1)
2129 // CHECK4-NEXT:    call void @__kmpc_data_sharing_init_stack_spmd()
2130 // CHECK4-NEXT:    br label [[DOTEXECUTE:%.*]]
2131 // CHECK4:       .execute:
2132 // CHECK4-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
2133 // CHECK4-NEXT:    [[TMP3:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
2134 // CHECK4-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
2135 // CHECK4-NEXT:    [[TMP5:%.*]] = bitcast %struct.S* [[TMP0]] to i8*
2136 // CHECK4-NEXT:    store i8* [[TMP5]], i8** [[TMP4]], align 8
2137 // CHECK4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1
2138 // CHECK4-NEXT:    [[TMP7:%.*]] = bitcast %class.anon.0* [[TMP3]] to i8*
2139 // CHECK4-NEXT:    store i8* [[TMP7]], i8** [[TMP6]], align 8
2140 // CHECK4-NEXT:    [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
2141 // CHECK4-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %struct.S*, %class.anon.0*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP8]], i64 2)
2142 // CHECK4-NEXT:    br label [[DOTOMP_DEINIT:%.*]]
2143 // CHECK4:       .omp.deinit:
2144 // CHECK4-NEXT:    call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
2145 // CHECK4-NEXT:    br label [[DOTEXIT:%.*]]
2146 // CHECK4:       .exit:
2147 // CHECK4-NEXT:    ret void
2148 //
2149 //
2150 // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1
2151 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %struct.S* [[THIS:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[L:%.*]]) #[[ATTR1]] {
2152 // CHECK4-NEXT:  entry:
2153 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
2154 // CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
2155 // CHECK4-NEXT:    [[THIS_ADDR:%.*]] = alloca %struct.S*, align 8
2156 // CHECK4-NEXT:    [[L_ADDR:%.*]] = alloca %class.anon.0*, align 8
2157 // CHECK4-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
2158 // CHECK4-NEXT:    [[L1:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
2159 // CHECK4-NEXT:    [[_TMP2:%.*]] = alloca %class.anon.0*, align 8
2160 // CHECK4-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
2161 // CHECK4-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
2162 // CHECK4-NEXT:    store %struct.S* [[THIS]], %struct.S** [[THIS_ADDR]], align 8
2163 // CHECK4-NEXT:    store %class.anon.0* [[L]], %class.anon.0** [[L_ADDR]], align 8
2164 // CHECK4-NEXT:    [[TMP0:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8
2165 // CHECK4-NEXT:    [[TMP1:%.*]] = load %class.anon.0*, %class.anon.0** [[L_ADDR]], align 8
2166 // CHECK4-NEXT:    store %class.anon.0* [[TMP1]], %class.anon.0** [[TMP]], align 8
2167 // CHECK4-NEXT:    [[TMP2:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
2168 // CHECK4-NEXT:    [[TMP3:%.*]] = bitcast %class.anon.0* [[L1]] to i8*
2169 // CHECK4-NEXT:    [[TMP4:%.*]] = bitcast %class.anon.0* [[TMP2]] to i8*
2170 // CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP3]], i8* align 8 [[TMP4]], i64 8, i1 false)
2171 // CHECK4-NEXT:    store %class.anon.0* [[L1]], %class.anon.0** [[_TMP2]], align 8
2172 // CHECK4-NEXT:    [[TMP5:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8
2173 // CHECK4-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[TMP5]], i32 0, i32 0
2174 // CHECK4-NEXT:    store %struct.S* [[TMP0]], %struct.S** [[TMP6]], align 8
2175 // CHECK4-NEXT:    [[TMP7:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8
2176 // CHECK4-NEXT:    [[CALL:%.*]] = call i32 @_ZZN1S3fooEvENKUlvE_clEv(%class.anon.0* nonnull align 8 dereferenceable(8) [[TMP7]]) #[[ATTR7]]
2177 // CHECK4-NEXT:    ret void
2178 //
2179 //
2180 // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooIZN1S3fooEvEUlvE_EiRKT__l18
2181 // CHECK4-SAME: (%class.anon.0* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR1]] {
2182 // CHECK4-NEXT:  entry:
2183 // CHECK4-NEXT:    [[T_ADDR:%.*]] = alloca %class.anon.0*, align 8
2184 // CHECK4-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
2185 // CHECK4-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
2186 // CHECK4-NEXT:    store %class.anon.0* [[T]], %class.anon.0** [[T_ADDR]], align 8
2187 // CHECK4-NEXT:    [[TMP0:%.*]] = load %class.anon.0*, %class.anon.0** [[T_ADDR]], align 8
2188 // CHECK4-NEXT:    store %class.anon.0* [[TMP0]], %class.anon.0** [[TMP]], align 8
2189 // CHECK4-NEXT:    [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x()
2190 // CHECK4-NEXT:    call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1)
2191 // CHECK4-NEXT:    call void @__kmpc_data_sharing_init_stack_spmd()
2192 // CHECK4-NEXT:    br label [[DOTEXECUTE:%.*]]
2193 // CHECK4:       .execute:
2194 // CHECK4-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
2195 // CHECK4-NEXT:    [[TMP2:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
2196 // CHECK4-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
2197 // CHECK4-NEXT:    [[TMP4:%.*]] = bitcast %class.anon.0* [[TMP2]] to i8*
2198 // CHECK4-NEXT:    store i8* [[TMP4]], i8** [[TMP3]], align 8
2199 // CHECK4-NEXT:    [[TMP5:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
2200 // CHECK4-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, %class.anon.0*)* @__omp_outlined__2 to i8*), i8* null, i8** [[TMP5]], i64 1)
2201 // CHECK4-NEXT:    br label [[DOTOMP_DEINIT:%.*]]
2202 // CHECK4:       .omp.deinit:
2203 // CHECK4-NEXT:    call void @__kmpc_spmd_kernel_deinit_v2(i16 1)
2204 // CHECK4-NEXT:    br label [[DOTEXIT:%.*]]
2205 // CHECK4:       .exit:
2206 // CHECK4-NEXT:    ret void
2207 //
2208 //
2209 // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__2
2210 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], %class.anon.0* nonnull align 8 dereferenceable(8) [[T:%.*]]) #[[ATTR1]] {
2211 // CHECK4-NEXT:  entry:
2212 // CHECK4-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
2213 // CHECK4-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
2214 // CHECK4-NEXT:    [[T_ADDR:%.*]] = alloca %class.anon.0*, align 8
2215 // CHECK4-NEXT:    [[TMP:%.*]] = alloca %class.anon.0*, align 8
2216 // CHECK4-NEXT:    [[T1:%.*]] = alloca [[CLASS_ANON_0:%.*]], align 8
2217 // CHECK4-NEXT:    [[_TMP2:%.*]] = alloca %class.anon.0*, align 8
2218 // CHECK4-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
2219 // CHECK4-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
2220 // CHECK4-NEXT:    store %class.anon.0* [[T]], %class.anon.0** [[T_ADDR]], align 8
2221 // CHECK4-NEXT:    [[TMP0:%.*]] = load %class.anon.0*, %class.anon.0** [[T_ADDR]], align 8
2222 // CHECK4-NEXT:    store %class.anon.0* [[TMP0]], %class.anon.0** [[TMP]], align 8
2223 // CHECK4-NEXT:    [[TMP1:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8
2224 // CHECK4-NEXT:    [[TMP2:%.*]] = bitcast %class.anon.0* [[T1]] to i8*
2225 // CHECK4-NEXT:    [[TMP3:%.*]] = bitcast %class.anon.0* [[TMP1]] to i8*
2226 // CHECK4-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 8, i1 false)
2227 // CHECK4-NEXT:    store %class.anon.0* [[T1]], %class.anon.0** [[_TMP2]], align 8
2228 // CHECK4-NEXT:    [[TMP4:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8
2229 // CHECK4-NEXT:    [[TMP5:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8
2230 // CHECK4-NEXT:    [[CALL:%.*]] = call i32 @_ZZN1S3fooEvENKUlvE_clEv(%class.anon.0* nonnull align 8 dereferenceable(8) [[TMP5]]) #[[ATTR7]]
2231 // CHECK4-NEXT:    ret void
2232 //
2233