1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
2; RUN: opt -S -passes=openmp-opt-cgscc -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency < %s | FileCheck %s
3target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
4
5; CHECK: %struct.__tgt_async_info = type { i8* }
6
7%struct.ident_t = type { i32, i32, i32, i32, i8* }
8%struct.__tgt_offload_entry = type { i8*, i8*, i64, i32, i32 }
9
10@.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 35]
11@.__omp_offloading_heavyComputation1.region_id = weak constant i8 0
12@.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 8]
13@.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 800]
14
15@.__omp_offloading_heavyComputation2.region_id = weak constant i8 0
16@.offload_maptypes.3 = private unnamed_addr constant [2 x i64] [i64 35, i64 35]
17
18@.__omp_offloading_heavyComputation3.region_id = weak constant i8 0
19@.offload_sizes.2 = private unnamed_addr constant [2 x i64] [i64 4, i64 0]
20@.offload_maptypes.4 = private unnamed_addr constant [2 x i64] [i64 800, i64 544]
21
22@.offload_maptypes.5 = private unnamed_addr constant [1 x i64] [i64 33]
23
24@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8
25@.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
26
27;double heavyComputation1() {
28;  double a = rand() % 777;
29;  double random = rand();
30;
31;  //#pragma omp target data map(a)
32;  void* args[1];
33;  args[0] = &a;
34;  __tgt_target_data_begin(..., args, ...)
35;
36;  #pragma omp target teams
37;  for (int i = 0; i < 1000; ++i) {
38;    a *= i*i / 2;
39;  }
40;
41;  return random + a;
42;}
43define dso_local double @heavyComputation1() {
44; CHECK-LABEL: define {{[^@]+}}@heavyComputation1() {
45; CHECK-NEXT:  entry:
46; CHECK-NEXT:    [[HANDLE:%.*]] = alloca [[STRUCT___TGT_ASYNC_INFO:%.*]], align 8
47; CHECK-NEXT:    [[A:%.*]] = alloca double, align 8
48; CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8
49; CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8
50; CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [1 x i8*], align 8
51; CHECK-NEXT:    [[DOTOFFLOAD_PTRS5:%.*]] = alloca [1 x i8*], align 8
52; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A]] to i8*
53; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @rand()
54; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[CALL]], 777
55; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[REM]] to double
56; CHECK-NEXT:    store double [[CONV]], double* [[A]], align 8
57; CHECK-NEXT:    [[CALL1:%.*]] = tail call i32 (...) @rand()
58; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 0
59; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]] to double**
60; CHECK-NEXT:    store double* [[A]], double** [[TMP2]], align 8
61; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 0
62; CHECK-NEXT:    [[TMP4:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_PTRS]] to double**
63; CHECK-NEXT:    store double* [[A]], double** [[TMP4]], align 8
64; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper_issue(%struct.ident_t* @[[GLOB0:[0-9]+]], i64 -1, i32 1, i8** [[TMP1]], i8** [[TMP3]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null, i8** null, %struct.__tgt_async_info* [[HANDLE]])
65; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[A]] to i64*
66; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info* [[HANDLE]])
67; CHECK-NEXT:    [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
68; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i64 0, i64 0
69; CHECK-NEXT:    [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_BASEPTRS4]] to i64*
70; CHECK-NEXT:    store i64 [[TMP6]], i64* [[TMP8]], align 8
71; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS5]], i64 0, i64 0
72; CHECK-NEXT:    [[TMP10:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_PTRS5]] to i64*
73; CHECK-NEXT:    store i64 [[TMP6]], i64* [[TMP10]], align 8
74; CHECK-NEXT:    [[TMP11:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i8* nonnull @.__omp_offloading_heavyComputation1.region_id, i32 1, i8** nonnull [[TMP7]], i8** nonnull [[TMP9]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0)
75; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP11]], 0
76; CHECK-NEXT:    br i1 [[DOTNOT]], label [[OMP_OFFLOAD_CONT:%.*]], label [[OMP_OFFLOAD_FAILED:%.*]]
77; CHECK:       omp_offload.failed:
78; CHECK-NEXT:    call void @heavyComputation1FallBack(i64 [[TMP6]])
79; CHECK-NEXT:    br label [[OMP_OFFLOAD_CONT]]
80; CHECK:       omp_offload.cont:
81; CHECK-NEXT:    [[CONV2:%.*]] = sitofp i32 [[CALL1]] to double
82; CHECK-NEXT:    call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 1, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null, i8** null)
83; CHECK-NEXT:    [[TMP12:%.*]] = load double, double* [[A]], align 8
84; CHECK-NEXT:    [[ADD:%.*]] = fadd double [[TMP12]], [[CONV2]]
85; CHECK-NEXT:    ret double [[ADD]]
86;
87
88
89
90
91
92
93entry:
94  %a = alloca double, align 8
95  %.offload_baseptrs = alloca [1 x i8*], align 8
96  %.offload_ptrs = alloca [1 x i8*], align 8
97  %.offload_baseptrs4 = alloca [1 x i8*], align 8
98  %.offload_ptrs5 = alloca [1 x i8*], align 8
99
100  %0 = bitcast double* %a to i8*
101  %call = tail call i32 (...) @rand()
102  %rem = srem i32 %call, 777
103  %conv = sitofp i32 %rem to double
104  store double %conv, double* %a, align 8
105
106  ; FIXME: call to @__tgt_target_data_begin_mapper_issue(%struct.ident_t* @0, ...) should be moved here.
107  %call1 = tail call i32 (...) @rand()
108
109  %1 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_baseptrs, i64 0, i64 0
110  %2 = bitcast [1 x i8*]* %.offload_baseptrs to double**
111  store double* %a, double** %2, align 8
112  %3 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i64 0, i64 0
113  %4 = bitcast [1 x i8*]* %.offload_ptrs to double**
114  store double* %a, double** %4, align 8
115  call void @__tgt_target_data_begin_mapper(%struct.ident_t* @0, i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null, i8** null)
116
117  %5 = bitcast double* %a to i64*
118  %6 = load i64, i64* %5, align 8
119  %7 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_baseptrs4, i64 0, i64 0
120  %8 = bitcast [1 x i8*]* %.offload_baseptrs4 to i64*
121  store i64 %6, i64* %8, align 8
122  %9 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs5, i64 0, i64 0
123  %10 = bitcast [1 x i8*]* %.offload_ptrs5 to i64*
124  store i64 %6, i64* %10, align 8
125
126  ; FIXME: call to @__tgt_target_data_begin_mapper_wait(...) should be moved here.
127  %11 = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @0, i64 -1, i8* nonnull @.__omp_offloading_heavyComputation1.region_id, i32 1, i8** nonnull %7, i8** nonnull %9, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0)
128  %.not = icmp eq i32 %11, 0
129  br i1 %.not, label %omp_offload.cont, label %omp_offload.failed
130
131omp_offload.failed:                               ; preds = %entry
132  call void @heavyComputation1FallBack(i64 %6)
133  br label %omp_offload.cont
134
135omp_offload.cont:                                 ; preds = %omp_offload.failed, %entry
136  %conv2 = sitofp i32 %call1 to double
137  call void @__tgt_target_data_end_mapper(%struct.ident_t* @0, i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null, i8** null)
138  %12 = load double, double* %a, align 8
139  %add = fadd double %12, %conv2
140  ret double %add
141}
142
143define internal void @heavyComputation1FallBack(i64 %a) {
144; CHECK-LABEL: define {{[^@]+}}@heavyComputation1FallBack
145; CHECK-SAME: (i64 [[A:%.*]]) {
146; CHECK-NEXT:  entry:
147; CHECK-NEXT:    ret void
148;
149entry:
150  ; Fallback for offloading function heavyComputation1.
151  ret void
152}
153
154;int heavyComputation2(double* a, unsigned size) {
155;  int random = rand() % 7;
156;
157;  //#pragma omp target data map(a[0:size], size)
158;  void* args[2];
159;  args[0] = &a;
160;  args[1] = &size;
161;  __tgt_target_data_begin(..., args, ...)
162;
163;  #pragma omp target teams
164;  for (int i = 0; i < size; ++i) {
165;    a[i] = ++a[i] * 3.141624;
166;  }
167;
168;  return random;
169;}
170define dso_local i32 @heavyComputation2(double* %a, i32 %size) {
171; CHECK-LABEL: define {{[^@]+}}@heavyComputation2
172; CHECK-SAME: (double* [[A:%.*]], i32 [[SIZE:%.*]]) {
173; CHECK-NEXT:  entry:
174; CHECK-NEXT:    [[SIZE_ADDR:%.*]] = alloca i32, align 4
175; CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8
176; CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8
177; CHECK-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8
178; CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x i8*], align 8
179; CHECK-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x i8*], align 8
180; CHECK-NEXT:    store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
181; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @rand()
182; CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[SIZE]] to i64
183; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[CONV]], 3
184; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 0
185; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]] to double**
186; CHECK-NEXT:    store double* [[A]], double** [[TMP2]], align 8
187; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 0
188; CHECK-NEXT:    [[TMP4:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_PTRS]] to double**
189; CHECK-NEXT:    store double* [[A]], double** [[TMP4]], align 8
190; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i64 0, i64 0
191; CHECK-NEXT:    store i64 [[TMP0]], i64* [[TMP5]], align 8
192; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 1
193; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32**
194; CHECK-NEXT:    store i32* [[SIZE_ADDR]], i32** [[TMP7]], align 8
195; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 1
196; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32**
197; CHECK-NEXT:    store i32* [[SIZE_ADDR]], i32** [[TMP9]], align 8
198; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i64 0, i64 1
199; CHECK-NEXT:    store i64 4, i64* [[TMP10]], align 8
200; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 2, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* nonnull [[TMP5]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null)
201; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
202; CHECK-NEXT:    [[SIZE_CASTED:%.*]] = zext i32 [[TMP11]] to i64
203; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 0
204; CHECK-NEXT:    [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]] to i64*
205; CHECK-NEXT:    store i64 [[SIZE_CASTED]], i64* [[TMP13]], align 8
206; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i64 0, i64 0
207; CHECK-NEXT:    [[TMP15:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_PTRS3]] to i64*
208; CHECK-NEXT:    store i64 [[SIZE_CASTED]], i64* [[TMP15]], align 8
209; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 1
210; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to double**
211; CHECK-NEXT:    store double* [[A]], double** [[TMP17]], align 8
212; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i64 0, i64 1
213; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to double**
214; CHECK-NEXT:    store double* [[A]], double** [[TMP19]], align 8
215; CHECK-NEXT:    [[TMP20:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i8* nonnull @.__omp_offloading_heavyComputation2.region_id, i32 2, i8** nonnull [[TMP12]], i8** nonnull [[TMP14]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.2, i64 0, i64 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0)
216; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP20]], 0
217; CHECK-NEXT:    br i1 [[DOTNOT]], label [[OMP_OFFLOAD_CONT:%.*]], label [[OMP_OFFLOAD_FAILED:%.*]]
218; CHECK:       omp_offload.failed:
219; CHECK-NEXT:    call void @heavyComputation2FallBack(i64 [[SIZE_CASTED]], double* [[A]])
220; CHECK-NEXT:    br label [[OMP_OFFLOAD_CONT]]
221; CHECK:       omp_offload.cont:
222; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[CALL]], 7
223; CHECK-NEXT:    call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 2, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* nonnull [[TMP5]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null)
224; CHECK-NEXT:    ret i32 [[REM]]
225;
226
227
228entry:
229  %size.addr = alloca i32, align 4
230  %.offload_baseptrs = alloca [2 x i8*], align 8
231  %.offload_ptrs = alloca [2 x i8*], align 8
232  %.offload_sizes = alloca [2 x i64], align 8
233  %.offload_baseptrs2 = alloca [2 x i8*], align 8
234  %.offload_ptrs3 = alloca [2 x i8*], align 8
235
236  store i32 %size, i32* %size.addr, align 4
237  %call = tail call i32 (...) @rand()
238
239  %conv = zext i32 %size to i64
240  %0 = shl nuw nsw i64 %conv, 3
241  %1 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 0
242  %2 = bitcast [2 x i8*]* %.offload_baseptrs to double**
243  store double* %a, double** %2, align 8
244  %3 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 0
245  %4 = bitcast [2 x i8*]* %.offload_ptrs to double**
246  store double* %a, double** %4, align 8
247  %5 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 0
248  store i64 %0, i64* %5, align 8
249  %6 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 1
250  %7 = bitcast i8** %6 to i32**
251  store i32* %size.addr, i32** %7, align 8
252  %8 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 1
253  %9 = bitcast i8** %8 to i32**
254  store i32* %size.addr, i32** %9, align 8
255  %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1
256  store i64 4, i64* %10, align 8
257  call void @__tgt_target_data_begin_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null)
258
259  %11 = load i32, i32* %size.addr, align 4
260  %size.casted = zext i32 %11 to i64
261  %12 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 0
262  %13 = bitcast [2 x i8*]* %.offload_baseptrs2 to i64*
263  store i64 %size.casted, i64* %13, align 8
264  %14 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs3, i64 0, i64 0
265  %15 = bitcast [2 x i8*]* %.offload_ptrs3 to i64*
266  store i64 %size.casted, i64* %15, align 8
267  %16 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 1
268  %17 = bitcast i8** %16 to double**
269  store double* %a, double** %17, align 8
270  %18 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs3, i64 0, i64 1
271  %19 = bitcast i8** %18 to double**
272  store double* %a, double** %19, align 8
273
274  ; FIXME: call to @__tgt_target_data_begin_mapper_wait(...) should be moved here.
275  %20 = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @0, i64 -1, i8* nonnull @.__omp_offloading_heavyComputation2.region_id, i32 2, i8** nonnull %12, i8** nonnull %14, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.2, i64 0, i64 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0)
276  %.not = icmp eq i32 %20, 0
277  br i1 %.not, label %omp_offload.cont, label %omp_offload.failed
278
279omp_offload.failed:                               ; preds = %entry
280  call void @heavyComputation2FallBack(i64 %size.casted, double* %a)
281  br label %omp_offload.cont
282
283omp_offload.cont:                                 ; preds = %omp_offload.failed, %entry
284  %rem = srem i32 %call, 7
285  call void @__tgt_target_data_end_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null)
286  ret i32 %rem
287}
288
289define internal void @heavyComputation2FallBack(i64 %size, double* %a) {
290; CHECK-LABEL: define {{[^@]+}}@heavyComputation2FallBack
291; CHECK-SAME: (i64 [[SIZE:%.*]], double* [[A:%.*]]) {
292; CHECK-NEXT:  entry:
293; CHECK-NEXT:    ret void
294;
295entry:
296  ; Fallback for offloading function heavyComputation2.
297  ret void
298}
299
300;int heavyComputation3(double* restrict a, unsigned size) {
301;  int random = rand() % 7;
302;
303;  //#pragma omp target data map(a[0:size], size)
304;  void* args[2];
305;  args[0] = &a;
306;  args[1] = &size;
307;  __tgt_target_data_begin(..., args, ...)
308;
309;  #pragma omp target teams
310;  for (int i = 0; i < size; ++i) {
311;    a[i] = ++a[i] * 3.141624;
312;  }
313;
314;  return random;
315;}
316define dso_local i32 @heavyComputation3(double* noalias %a, i32 %size) {
317; CHECK-LABEL: define {{[^@]+}}@heavyComputation3
318; CHECK-SAME: (double* noalias [[A:%.*]], i32 [[SIZE:%.*]]) {
319; CHECK-NEXT:  entry:
320; CHECK-NEXT:    [[SIZE_ADDR:%.*]] = alloca i32, align 4
321; CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8
322; CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8
323; CHECK-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8
324; CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x i8*], align 8
325; CHECK-NEXT:    [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x i8*], align 8
326; CHECK-NEXT:    store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4
327; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @rand()
328; CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[SIZE]] to i64
329; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[CONV]], 3
330; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 0
331; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]] to double**
332; CHECK-NEXT:    store double* [[A]], double** [[TMP2]], align 8
333; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 0
334; CHECK-NEXT:    [[TMP4:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_PTRS]] to double**
335; CHECK-NEXT:    store double* [[A]], double** [[TMP4]], align 8
336; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i64 0, i64 0
337; CHECK-NEXT:    store i64 [[TMP0]], i64* [[TMP5]], align 8
338; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 1
339; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32**
340; CHECK-NEXT:    store i32* [[SIZE_ADDR]], i32** [[TMP7]], align 8
341; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 1
342; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32**
343; CHECK-NEXT:    store i32* [[SIZE_ADDR]], i32** [[TMP9]], align 8
344; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i64 0, i64 1
345; CHECK-NEXT:    store i64 4, i64* [[TMP10]], align 8
346; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 2, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* nonnull [[TMP5]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null)
347; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4
348; CHECK-NEXT:    [[SIZE_CASTED:%.*]] = zext i32 [[TMP11]] to i64
349; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 0
350; CHECK-NEXT:    [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]] to i64*
351; CHECK-NEXT:    store i64 [[SIZE_CASTED]], i64* [[TMP13]], align 8
352; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i64 0, i64 0
353; CHECK-NEXT:    [[TMP15:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_PTRS3]] to i64*
354; CHECK-NEXT:    store i64 [[SIZE_CASTED]], i64* [[TMP15]], align 8
355; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 1
356; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i8** [[TMP16]] to double**
357; CHECK-NEXT:    store double* [[A]], double** [[TMP17]], align 8
358; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i64 0, i64 1
359; CHECK-NEXT:    [[TMP19:%.*]] = bitcast i8** [[TMP18]] to double**
360; CHECK-NEXT:    store double* [[A]], double** [[TMP19]], align 8
361; CHECK-NEXT:    [[TMP20:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i8* nonnull @.__omp_offloading_heavyComputation3.region_id, i32 2, i8** nonnull [[TMP12]], i8** nonnull [[TMP14]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.2, i64 0, i64 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0)
362; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[TMP20]], 0
363; CHECK-NEXT:    br i1 [[DOTNOT]], label [[OMP_OFFLOAD_CONT:%.*]], label [[OMP_OFFLOAD_FAILED:%.*]]
364; CHECK:       omp_offload.failed:
365; CHECK-NEXT:    call void @heavyComputation3FallBack(i64 [[SIZE_CASTED]], double* [[A]])
366; CHECK-NEXT:    br label [[OMP_OFFLOAD_CONT]]
367; CHECK:       omp_offload.cont:
368; CHECK-NEXT:    [[REM:%.*]] = srem i32 [[CALL]], 7
369; CHECK-NEXT:    call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 2, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* nonnull [[TMP5]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null)
370; CHECK-NEXT:    ret i32 [[REM]]
371;
372
373
374entry:
375  %size.addr = alloca i32, align 4
376  %.offload_baseptrs = alloca [2 x i8*], align 8
377  %.offload_ptrs = alloca [2 x i8*], align 8
378  %.offload_sizes = alloca [2 x i64], align 8
379  %.offload_baseptrs2 = alloca [2 x i8*], align 8
380  %.offload_ptrs3 = alloca [2 x i8*], align 8
381  store i32 %size, i32* %size.addr, align 4
382
383  ; FIXME: call to @__tgt_target_data_begin_mapper_issue(%struct.ident_t* @0, ...) should be moved here.
384  %call = tail call i32 (...) @rand()
385
386  %conv = zext i32 %size to i64
387  %0 = shl nuw nsw i64 %conv, 3
388  %1 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 0
389  %2 = bitcast [2 x i8*]* %.offload_baseptrs to double**
390  store double* %a, double** %2, align 8
391  %3 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 0
392  %4 = bitcast [2 x i8*]* %.offload_ptrs to double**
393  store double* %a, double** %4, align 8
394  %5 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 0
395  store i64 %0, i64* %5, align 8
396  %6 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 1
397  %7 = bitcast i8** %6 to i32**
398  store i32* %size.addr, i32** %7, align 8
399  %8 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 1
400  %9 = bitcast i8** %8 to i32**
401  store i32* %size.addr, i32** %9, align 8
402  %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1
403  store i64 4, i64* %10, align 8
404  call void @__tgt_target_data_begin_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null)
405
406  %11 = load i32, i32* %size.addr, align 4
407  %size.casted = zext i32 %11 to i64
408  %12 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 0
409  %13 = bitcast [2 x i8*]* %.offload_baseptrs2 to i64*
410  store i64 %size.casted, i64* %13, align 8
411  %14 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs3, i64 0, i64 0
412  %15 = bitcast [2 x i8*]* %.offload_ptrs3 to i64*
413  store i64 %size.casted, i64* %15, align 8
414  %16 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 1
415  %17 = bitcast i8** %16 to double**
416  store double* %a, double** %17, align 8
417  %18 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs3, i64 0, i64 1
418  %19 = bitcast i8** %18 to double**
419  store double* %a, double** %19, align 8
420
421  ; FIXME: call to @__tgt_target_data_begin_mapper_wait(...) should be moved here.
422  %20 = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @0, i64 -1, i8* nonnull @.__omp_offloading_heavyComputation3.region_id, i32 2, i8** nonnull %12, i8** nonnull %14, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.2, i64 0, i64 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0)
423  %.not = icmp eq i32 %20, 0
424  br i1 %.not, label %omp_offload.cont, label %omp_offload.failed
425
426omp_offload.failed:                               ; preds = %entry
427  call void @heavyComputation3FallBack(i64 %size.casted, double* %a)
428  br label %omp_offload.cont
429
430omp_offload.cont:                                 ; preds = %omp_offload.failed, %entry
431  %rem = srem i32 %call, 7
432  call void @__tgt_target_data_end_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null)
433  ret i32 %rem
434}
435
436define internal void @heavyComputation3FallBack(i64 %size, double* %a) {
437; CHECK-LABEL: define {{[^@]+}}@heavyComputation3FallBack
438; CHECK-SAME: (i64 [[SIZE:%.*]], double* [[A:%.*]]) {
439; CHECK-NEXT:  entry:
440; CHECK-NEXT:    ret void
441;
442entry:
443  ; Fallback for offloading function heavyComputation3.
444  ret void
445}
446
447;int dataTransferOnly1(double* restrict a, unsigned size) {
448;  // Random computation.
449;  int random = rand();
450;
451;  //#pragma omp target data map(to:a[0:size])
452;  void* args[1];
453;  args[0] = &a;
454;  __tgt_target_data_begin(..., args, ...)
455;
456;  // Random computation.
457;  random %= size;
458;  return random;
459;}
460define dso_local i32 @dataTransferOnly1(double* noalias %a, i32 %size) {
461; CHECK-LABEL: define {{[^@]+}}@dataTransferOnly1
462; CHECK-SAME: (double* noalias [[A:%.*]], i32 [[SIZE:%.*]]) {
463; CHECK-NEXT:  entry:
464; CHECK-NEXT:    [[HANDLE:%.*]] = alloca [[STRUCT___TGT_ASYNC_INFO:%.*]], align 8
465; CHECK-NEXT:    [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8
466; CHECK-NEXT:    [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8
467; CHECK-NEXT:    [[DOTOFFLOAD_SIZES:%.*]] = alloca [1 x i64], align 8
468; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 (...) @rand()
469; CHECK-NEXT:    [[CONV:%.*]] = zext i32 [[SIZE]] to i64
470; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i64 [[CONV]], 3
471; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 0
472; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]] to double**
473; CHECK-NEXT:    store double* [[A]], double** [[TMP2]], align 8
474; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 0
475; CHECK-NEXT:    [[TMP4:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_PTRS]] to double**
476; CHECK-NEXT:    store double* [[A]], double** [[TMP4]], align 8
477; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[DOTOFFLOAD_SIZES]], i64 0, i64 0
478; CHECK-NEXT:    store i64 [[TMP0]], i64* [[TMP5]], align 8
479; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper_issue(%struct.ident_t* @[[GLOB0]], i64 -1, i32 1, i8** [[TMP1]], i8** [[TMP3]], i64* [[TMP5]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null, i8** null, %struct.__tgt_async_info* [[HANDLE]])
480; CHECK-NEXT:    [[REM:%.*]] = urem i32 [[CALL]], [[SIZE]]
481; CHECK-NEXT:    call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info* [[HANDLE]])
482; CHECK-NEXT:    call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 1, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* nonnull [[TMP5]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null, i8** null)
483; CHECK-NEXT:    ret i32 [[REM]]
484;
485
486
487
488
489
490
491entry:
492  %.offload_baseptrs = alloca [1 x i8*], align 8
493  %.offload_ptrs = alloca [1 x i8*], align 8
494  %.offload_sizes = alloca [1 x i64], align 8
495
496  ; FIXME: call to @__tgt_target_data_begin_issue_mapper(...) should be moved here.
497  %call = tail call i32 (...) @rand()
498
499  %conv = zext i32 %size to i64
500  %0 = shl nuw nsw i64 %conv, 3
501  %1 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_baseptrs, i64 0, i64 0
502  %2 = bitcast [1 x i8*]* %.offload_baseptrs to double**
503  store double* %a, double** %2, align 8
504  %3 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i64 0, i64 0
505  %4 = bitcast [1 x i8*]* %.offload_ptrs to double**
506  store double* %a, double** %4, align 8
507  %5 = getelementptr inbounds [1 x i64], [1 x i64]* %.offload_sizes, i64 0, i64 0
508  store i64 %0, i64* %5, align 8
509  call void @__tgt_target_data_begin_mapper(%struct.ident_t* @0, i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null, i8** null)
510
511  %rem = urem i32 %call, %size
512
513  call void @__tgt_target_data_end_mapper(%struct.ident_t* @0, i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null, i8** null)
514  ret i32 %rem
515}
516
517declare void @__tgt_target_data_begin_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
518declare i32 @__tgt_target_teams_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i32)
519declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**)
520
521declare dso_local i32 @rand(...)
522
523; CHECK: declare void @__tgt_target_data_begin_mapper_issue(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**, %struct.__tgt_async_info*)
524; CHECK: declare void @__tgt_target_data_begin_mapper_wait(i64, %struct.__tgt_async_info*)
525
526!llvm.module.flags = !{!0}
527
528!0 = !{i32 7, !"openmp", i32 50}
529