1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature 2; RUN: opt -S -passes=openmp-opt-cgscc -aa-pipeline=basic-aa -openmp-hide-memory-transfer-latency < %s | FileCheck %s 3target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 4 5; CHECK: %struct.__tgt_async_info = type { i8* } 6 7%struct.ident_t = type { i32, i32, i32, i32, i8* } 8%struct.__tgt_offload_entry = type { i8*, i8*, i64, i32, i32 } 9 10@.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 35] 11@.__omp_offloading_heavyComputation1.region_id = weak constant i8 0 12@.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 8] 13@.offload_maptypes.2 = private unnamed_addr constant [1 x i64] [i64 800] 14 15@.__omp_offloading_heavyComputation2.region_id = weak constant i8 0 16@.offload_maptypes.3 = private unnamed_addr constant [2 x i64] [i64 35, i64 35] 17 18@.__omp_offloading_heavyComputation3.region_id = weak constant i8 0 19@.offload_sizes.2 = private unnamed_addr constant [2 x i64] [i64 4, i64 0] 20@.offload_maptypes.4 = private unnamed_addr constant [2 x i64] [i64 800, i64 544] 21 22@.offload_maptypes.5 = private unnamed_addr constant [1 x i64] [i64 33] 23 24@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str0, i32 0, i32 0) }, align 8 25@.str0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 26 27;double heavyComputation1() { 28; double a = rand() % 777; 29; double random = rand(); 30; 31; //#pragma omp target data map(a) 32; void* args[1]; 33; args[0] = &a; 34; __tgt_target_data_begin(..., args, ...) 35; 36; #pragma omp target teams 37; for (int i = 0; i < 1000; ++i) { 38; a *= i*i / 2; 39; } 40; 41; return random + a; 42;} 43define dso_local double @heavyComputation1() { 44; CHECK-LABEL: define {{[^@]+}}@heavyComputation1() { 45; CHECK-NEXT: entry: 46; CHECK-NEXT: [[HANDLE:%.*]] = alloca [[STRUCT___TGT_ASYNC_INFO:%.*]], align 8 47; CHECK-NEXT: [[A:%.*]] = alloca double, align 8 48; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8 49; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8 50; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS4:%.*]] = alloca [1 x i8*], align 8 51; CHECK-NEXT: [[DOTOFFLOAD_PTRS5:%.*]] = alloca [1 x i8*], align 8 52; CHECK-NEXT: [[TMP0:%.*]] = bitcast double* [[A]] to i8* 53; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @rand() 54; CHECK-NEXT: [[REM:%.*]] = srem i32 [[CALL]], 777 55; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[REM]] to double 56; CHECK-NEXT: store double [[CONV]], double* [[A]], align 8 57; CHECK-NEXT: [[CALL1:%.*]] = tail call i32 (...) @rand() 58; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 0 59; CHECK-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]] to double** 60; CHECK-NEXT: store double* [[A]], double** [[TMP2]], align 8 61; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 0 62; CHECK-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_PTRS]] to double** 63; CHECK-NEXT: store double* [[A]], double** [[TMP4]], align 8 64; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_issue(%struct.ident_t* @[[GLOB0:[0-9]+]], i64 -1, i32 1, i8** [[TMP1]], i8** [[TMP3]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null, i8** null, %struct.__tgt_async_info* [[HANDLE]]) 65; CHECK-NEXT: [[TMP5:%.*]] = bitcast double* [[A]] to i64* 66; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info* [[HANDLE]]) 67; CHECK-NEXT: [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8 68; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS4]], i64 0, i64 0 69; CHECK-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_BASEPTRS4]] to i64* 70; CHECK-NEXT: store i64 [[TMP6]], i64* [[TMP8]], align 8 71; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS5]], i64 0, i64 0 72; CHECK-NEXT: [[TMP10:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_PTRS5]] to i64* 73; CHECK-NEXT: store i64 [[TMP6]], i64* [[TMP10]], align 8 74; CHECK-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i8* nonnull @.__omp_offloading_heavyComputation1.region_id, i32 1, i8** nonnull [[TMP7]], i8** nonnull [[TMP9]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0) 75; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP11]], 0 76; CHECK-NEXT: br i1 [[DOTNOT]], label [[OMP_OFFLOAD_CONT:%.*]], label [[OMP_OFFLOAD_FAILED:%.*]] 77; CHECK: omp_offload.failed: 78; CHECK-NEXT: call void @heavyComputation1FallBack(i64 [[TMP6]]) 79; CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]] 80; CHECK: omp_offload.cont: 81; CHECK-NEXT: [[CONV2:%.*]] = sitofp i32 [[CALL1]] to double 82; CHECK-NEXT: call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 1, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null, i8** null) 83; CHECK-NEXT: [[TMP12:%.*]] = load double, double* [[A]], align 8 84; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP12]], [[CONV2]] 85; CHECK-NEXT: ret double [[ADD]] 86; 87 88 89 90 91 92 93entry: 94 %a = alloca double, align 8 95 %.offload_baseptrs = alloca [1 x i8*], align 8 96 %.offload_ptrs = alloca [1 x i8*], align 8 97 %.offload_baseptrs4 = alloca [1 x i8*], align 8 98 %.offload_ptrs5 = alloca [1 x i8*], align 8 99 100 %0 = bitcast double* %a to i8* 101 %call = tail call i32 (...) @rand() 102 %rem = srem i32 %call, 777 103 %conv = sitofp i32 %rem to double 104 store double %conv, double* %a, align 8 105 106 ; FIXME: call to @__tgt_target_data_begin_mapper_issue(%struct.ident_t* @0, ...) should be moved here. 107 %call1 = tail call i32 (...) @rand() 108 109 %1 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_baseptrs, i64 0, i64 0 110 %2 = bitcast [1 x i8*]* %.offload_baseptrs to double** 111 store double* %a, double** %2, align 8 112 %3 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i64 0, i64 0 113 %4 = bitcast [1 x i8*]* %.offload_ptrs to double** 114 store double* %a, double** %4, align 8 115 call void @__tgt_target_data_begin_mapper(%struct.ident_t* @0, i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null, i8** null) 116 117 %5 = bitcast double* %a to i64* 118 %6 = load i64, i64* %5, align 8 119 %7 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_baseptrs4, i64 0, i64 0 120 %8 = bitcast [1 x i8*]* %.offload_baseptrs4 to i64* 121 store i64 %6, i64* %8, align 8 122 %9 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs5, i64 0, i64 0 123 %10 = bitcast [1 x i8*]* %.offload_ptrs5 to i64* 124 store i64 %6, i64* %10, align 8 125 126 ; FIXME: call to @__tgt_target_data_begin_mapper_wait(...) should be moved here. 127 %11 = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @0, i64 -1, i8* nonnull @.__omp_offloading_heavyComputation1.region_id, i32 1, i8** nonnull %7, i8** nonnull %9, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.2, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0) 128 %.not = icmp eq i32 %11, 0 129 br i1 %.not, label %omp_offload.cont, label %omp_offload.failed 130 131omp_offload.failed: ; preds = %entry 132 call void @heavyComputation1FallBack(i64 %6) 133 br label %omp_offload.cont 134 135omp_offload.cont: ; preds = %omp_offload.failed, %entry 136 %conv2 = sitofp i32 %call1 to double 137 call void @__tgt_target_data_end_mapper(%struct.ident_t* @0, i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_sizes.1, i64 0, i64 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes, i64 0, i64 0), i8** null, i8** null) 138 %12 = load double, double* %a, align 8 139 %add = fadd double %12, %conv2 140 ret double %add 141} 142 143define internal void @heavyComputation1FallBack(i64 %a) { 144; CHECK-LABEL: define {{[^@]+}}@heavyComputation1FallBack 145; CHECK-SAME: (i64 [[A:%.*]]) { 146; CHECK-NEXT: entry: 147; CHECK-NEXT: ret void 148; 149entry: 150 ; Fallback for offloading function heavyComputation1. 151 ret void 152} 153 154;int heavyComputation2(double* a, unsigned size) { 155; int random = rand() % 7; 156; 157; //#pragma omp target data map(a[0:size], size) 158; void* args[2]; 159; args[0] = &a; 160; args[1] = &size; 161; __tgt_target_data_begin(..., args, ...) 162; 163; #pragma omp target teams 164; for (int i = 0; i < size; ++i) { 165; a[i] = ++a[i] * 3.141624; 166; } 167; 168; return random; 169;} 170define dso_local i32 @heavyComputation2(double* %a, i32 %size) { 171; CHECK-LABEL: define {{[^@]+}}@heavyComputation2 172; CHECK-SAME: (double* [[A:%.*]], i32 [[SIZE:%.*]]) { 173; CHECK-NEXT: entry: 174; CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 175; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 176; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 177; CHECK-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8 178; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x i8*], align 8 179; CHECK-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x i8*], align 8 180; CHECK-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4 181; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @rand() 182; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[SIZE]] to i64 183; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[CONV]], 3 184; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 0 185; CHECK-NEXT: [[TMP2:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]] to double** 186; CHECK-NEXT: store double* [[A]], double** [[TMP2]], align 8 187; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 0 188; CHECK-NEXT: [[TMP4:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_PTRS]] to double** 189; CHECK-NEXT: store double* [[A]], double** [[TMP4]], align 8 190; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i64 0, i64 0 191; CHECK-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 192; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 1 193; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** 194; CHECK-NEXT: store i32* [[SIZE_ADDR]], i32** [[TMP7]], align 8 195; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 1 196; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32** 197; CHECK-NEXT: store i32* [[SIZE_ADDR]], i32** [[TMP9]], align 8 198; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i64 0, i64 1 199; CHECK-NEXT: store i64 4, i64* [[TMP10]], align 8 200; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 2, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* nonnull [[TMP5]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) 201; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 202; CHECK-NEXT: [[SIZE_CASTED:%.*]] = zext i32 [[TMP11]] to i64 203; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 0 204; CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]] to i64* 205; CHECK-NEXT: store i64 [[SIZE_CASTED]], i64* [[TMP13]], align 8 206; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i64 0, i64 0 207; CHECK-NEXT: [[TMP15:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_PTRS3]] to i64* 208; CHECK-NEXT: store i64 [[SIZE_CASTED]], i64* [[TMP15]], align 8 209; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 1 210; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to double** 211; CHECK-NEXT: store double* [[A]], double** [[TMP17]], align 8 212; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i64 0, i64 1 213; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to double** 214; CHECK-NEXT: store double* [[A]], double** [[TMP19]], align 8 215; CHECK-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i8* nonnull @.__omp_offloading_heavyComputation2.region_id, i32 2, i8** nonnull [[TMP12]], i8** nonnull [[TMP14]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.2, i64 0, i64 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0) 216; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP20]], 0 217; CHECK-NEXT: br i1 [[DOTNOT]], label [[OMP_OFFLOAD_CONT:%.*]], label [[OMP_OFFLOAD_FAILED:%.*]] 218; CHECK: omp_offload.failed: 219; CHECK-NEXT: call void @heavyComputation2FallBack(i64 [[SIZE_CASTED]], double* [[A]]) 220; CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]] 221; CHECK: omp_offload.cont: 222; CHECK-NEXT: [[REM:%.*]] = srem i32 [[CALL]], 7 223; CHECK-NEXT: call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 2, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* nonnull [[TMP5]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) 224; CHECK-NEXT: ret i32 [[REM]] 225; 226 227 228entry: 229 %size.addr = alloca i32, align 4 230 %.offload_baseptrs = alloca [2 x i8*], align 8 231 %.offload_ptrs = alloca [2 x i8*], align 8 232 %.offload_sizes = alloca [2 x i64], align 8 233 %.offload_baseptrs2 = alloca [2 x i8*], align 8 234 %.offload_ptrs3 = alloca [2 x i8*], align 8 235 236 store i32 %size, i32* %size.addr, align 4 237 %call = tail call i32 (...) @rand() 238 239 %conv = zext i32 %size to i64 240 %0 = shl nuw nsw i64 %conv, 3 241 %1 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 0 242 %2 = bitcast [2 x i8*]* %.offload_baseptrs to double** 243 store double* %a, double** %2, align 8 244 %3 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 0 245 %4 = bitcast [2 x i8*]* %.offload_ptrs to double** 246 store double* %a, double** %4, align 8 247 %5 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 0 248 store i64 %0, i64* %5, align 8 249 %6 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 1 250 %7 = bitcast i8** %6 to i32** 251 store i32* %size.addr, i32** %7, align 8 252 %8 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 1 253 %9 = bitcast i8** %8 to i32** 254 store i32* %size.addr, i32** %9, align 8 255 %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1 256 store i64 4, i64* %10, align 8 257 call void @__tgt_target_data_begin_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) 258 259 %11 = load i32, i32* %size.addr, align 4 260 %size.casted = zext i32 %11 to i64 261 %12 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 0 262 %13 = bitcast [2 x i8*]* %.offload_baseptrs2 to i64* 263 store i64 %size.casted, i64* %13, align 8 264 %14 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs3, i64 0, i64 0 265 %15 = bitcast [2 x i8*]* %.offload_ptrs3 to i64* 266 store i64 %size.casted, i64* %15, align 8 267 %16 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 1 268 %17 = bitcast i8** %16 to double** 269 store double* %a, double** %17, align 8 270 %18 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs3, i64 0, i64 1 271 %19 = bitcast i8** %18 to double** 272 store double* %a, double** %19, align 8 273 274 ; FIXME: call to @__tgt_target_data_begin_mapper_wait(...) should be moved here. 275 %20 = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @0, i64 -1, i8* nonnull @.__omp_offloading_heavyComputation2.region_id, i32 2, i8** nonnull %12, i8** nonnull %14, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.2, i64 0, i64 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0) 276 %.not = icmp eq i32 %20, 0 277 br i1 %.not, label %omp_offload.cont, label %omp_offload.failed 278 279omp_offload.failed: ; preds = %entry 280 call void @heavyComputation2FallBack(i64 %size.casted, double* %a) 281 br label %omp_offload.cont 282 283omp_offload.cont: ; preds = %omp_offload.failed, %entry 284 %rem = srem i32 %call, 7 285 call void @__tgt_target_data_end_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) 286 ret i32 %rem 287} 288 289define internal void @heavyComputation2FallBack(i64 %size, double* %a) { 290; CHECK-LABEL: define {{[^@]+}}@heavyComputation2FallBack 291; CHECK-SAME: (i64 [[SIZE:%.*]], double* [[A:%.*]]) { 292; CHECK-NEXT: entry: 293; CHECK-NEXT: ret void 294; 295entry: 296 ; Fallback for offloading function heavyComputation2. 297 ret void 298} 299 300;int heavyComputation3(double* restrict a, unsigned size) { 301; int random = rand() % 7; 302; 303; //#pragma omp target data map(a[0:size], size) 304; void* args[2]; 305; args[0] = &a; 306; args[1] = &size; 307; __tgt_target_data_begin(..., args, ...) 308; 309; #pragma omp target teams 310; for (int i = 0; i < size; ++i) { 311; a[i] = ++a[i] * 3.141624; 312; } 313; 314; return random; 315;} 316define dso_local i32 @heavyComputation3(double* noalias %a, i32 %size) { 317; CHECK-LABEL: define {{[^@]+}}@heavyComputation3 318; CHECK-SAME: (double* noalias [[A:%.*]], i32 [[SIZE:%.*]]) { 319; CHECK-NEXT: entry: 320; CHECK-NEXT: [[SIZE_ADDR:%.*]] = alloca i32, align 4 321; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [2 x i8*], align 8 322; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [2 x i8*], align 8 323; CHECK-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [2 x i64], align 8 324; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS2:%.*]] = alloca [2 x i8*], align 8 325; CHECK-NEXT: [[DOTOFFLOAD_PTRS3:%.*]] = alloca [2 x i8*], align 8 326; CHECK-NEXT: store i32 [[SIZE]], i32* [[SIZE_ADDR]], align 4 327; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @rand() 328; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[SIZE]] to i64 329; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[CONV]], 3 330; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 0 331; CHECK-NEXT: [[TMP2:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]] to double** 332; CHECK-NEXT: store double* [[A]], double** [[TMP2]], align 8 333; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 0 334; CHECK-NEXT: [[TMP4:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_PTRS]] to double** 335; CHECK-NEXT: store double* [[A]], double** [[TMP4]], align 8 336; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i64 0, i64 0 337; CHECK-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 338; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 1 339; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** 340; CHECK-NEXT: store i32* [[SIZE_ADDR]], i32** [[TMP7]], align 8 341; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 1 342; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8** [[TMP8]] to i32** 343; CHECK-NEXT: store i32* [[SIZE_ADDR]], i32** [[TMP9]], align 8 344; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[DOTOFFLOAD_SIZES]], i64 0, i64 1 345; CHECK-NEXT: store i64 4, i64* [[TMP10]], align 8 346; CHECK-NEXT: call void @__tgt_target_data_begin_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 2, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* nonnull [[TMP5]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) 347; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[SIZE_ADDR]], align 4 348; CHECK-NEXT: [[SIZE_CASTED:%.*]] = zext i32 [[TMP11]] to i64 349; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 0 350; CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]] to i64* 351; CHECK-NEXT: store i64 [[SIZE_CASTED]], i64* [[TMP13]], align 8 352; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i64 0, i64 0 353; CHECK-NEXT: [[TMP15:%.*]] = bitcast [2 x i8*]* [[DOTOFFLOAD_PTRS3]] to i64* 354; CHECK-NEXT: store i64 [[SIZE_CASTED]], i64* [[TMP15]], align 8 355; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_BASEPTRS2]], i64 0, i64 1 356; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8** [[TMP16]] to double** 357; CHECK-NEXT: store double* [[A]], double** [[TMP17]], align 8 358; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOFFLOAD_PTRS3]], i64 0, i64 1 359; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8** [[TMP18]] to double** 360; CHECK-NEXT: store double* [[A]], double** [[TMP19]], align 8 361; CHECK-NEXT: [[TMP20:%.*]] = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i8* nonnull @.__omp_offloading_heavyComputation3.region_id, i32 2, i8** nonnull [[TMP12]], i8** nonnull [[TMP14]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.2, i64 0, i64 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0) 362; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i32 [[TMP20]], 0 363; CHECK-NEXT: br i1 [[DOTNOT]], label [[OMP_OFFLOAD_CONT:%.*]], label [[OMP_OFFLOAD_FAILED:%.*]] 364; CHECK: omp_offload.failed: 365; CHECK-NEXT: call void @heavyComputation3FallBack(i64 [[SIZE_CASTED]], double* [[A]]) 366; CHECK-NEXT: br label [[OMP_OFFLOAD_CONT]] 367; CHECK: omp_offload.cont: 368; CHECK-NEXT: [[REM:%.*]] = srem i32 [[CALL]], 7 369; CHECK-NEXT: call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 2, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* nonnull [[TMP5]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) 370; CHECK-NEXT: ret i32 [[REM]] 371; 372 373 374entry: 375 %size.addr = alloca i32, align 4 376 %.offload_baseptrs = alloca [2 x i8*], align 8 377 %.offload_ptrs = alloca [2 x i8*], align 8 378 %.offload_sizes = alloca [2 x i64], align 8 379 %.offload_baseptrs2 = alloca [2 x i8*], align 8 380 %.offload_ptrs3 = alloca [2 x i8*], align 8 381 store i32 %size, i32* %size.addr, align 4 382 383 ; FIXME: call to @__tgt_target_data_begin_mapper_issue(%struct.ident_t* @0, ...) should be moved here. 384 %call = tail call i32 (...) @rand() 385 386 %conv = zext i32 %size to i64 387 %0 = shl nuw nsw i64 %conv, 3 388 %1 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 0 389 %2 = bitcast [2 x i8*]* %.offload_baseptrs to double** 390 store double* %a, double** %2, align 8 391 %3 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 0 392 %4 = bitcast [2 x i8*]* %.offload_ptrs to double** 393 store double* %a, double** %4, align 8 394 %5 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 0 395 store i64 %0, i64* %5, align 8 396 %6 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs, i64 0, i64 1 397 %7 = bitcast i8** %6 to i32** 398 store i32* %size.addr, i32** %7, align 8 399 %8 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs, i64 0, i64 1 400 %9 = bitcast i8** %8 to i32** 401 store i32* %size.addr, i32** %9, align 8 402 %10 = getelementptr inbounds [2 x i64], [2 x i64]* %.offload_sizes, i64 0, i64 1 403 store i64 4, i64* %10, align 8 404 call void @__tgt_target_data_begin_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) 405 406 %11 = load i32, i32* %size.addr, align 4 407 %size.casted = zext i32 %11 to i64 408 %12 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 0 409 %13 = bitcast [2 x i8*]* %.offload_baseptrs2 to i64* 410 store i64 %size.casted, i64* %13, align 8 411 %14 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs3, i64 0, i64 0 412 %15 = bitcast [2 x i8*]* %.offload_ptrs3 to i64* 413 store i64 %size.casted, i64* %15, align 8 414 %16 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_baseptrs2, i64 0, i64 1 415 %17 = bitcast i8** %16 to double** 416 store double* %a, double** %17, align 8 417 %18 = getelementptr inbounds [2 x i8*], [2 x i8*]* %.offload_ptrs3, i64 0, i64 1 418 %19 = bitcast i8** %18 to double** 419 store double* %a, double** %19, align 8 420 421 ; FIXME: call to @__tgt_target_data_begin_mapper_wait(...) should be moved here. 422 %20 = call i32 @__tgt_target_teams_mapper(%struct.ident_t* @0, i64 -1, i8* nonnull @.__omp_offloading_heavyComputation3.region_id, i32 2, i8** nonnull %12, i8** nonnull %14, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_sizes.2, i64 0, i64 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.4, i64 0, i64 0), i8** null, i8** null, i32 0, i32 0) 423 %.not = icmp eq i32 %20, 0 424 br i1 %.not, label %omp_offload.cont, label %omp_offload.failed 425 426omp_offload.failed: ; preds = %entry 427 call void @heavyComputation3FallBack(i64 %size.casted, double* %a) 428 br label %omp_offload.cont 429 430omp_offload.cont: ; preds = %omp_offload.failed, %entry 431 %rem = srem i32 %call, 7 432 call void @__tgt_target_data_end_mapper(%struct.ident_t* @0, i64 -1, i32 2, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([2 x i64], [2 x i64]* @.offload_maptypes.3, i64 0, i64 0), i8** null, i8** null) 433 ret i32 %rem 434} 435 436define internal void @heavyComputation3FallBack(i64 %size, double* %a) { 437; CHECK-LABEL: define {{[^@]+}}@heavyComputation3FallBack 438; CHECK-SAME: (i64 [[SIZE:%.*]], double* [[A:%.*]]) { 439; CHECK-NEXT: entry: 440; CHECK-NEXT: ret void 441; 442entry: 443 ; Fallback for offloading function heavyComputation3. 444 ret void 445} 446 447;int dataTransferOnly1(double* restrict a, unsigned size) { 448; // Random computation. 449; int random = rand(); 450; 451; //#pragma omp target data map(to:a[0:size]) 452; void* args[1]; 453; args[0] = &a; 454; __tgt_target_data_begin(..., args, ...) 455; 456; // Random computation. 457; random %= size; 458; return random; 459;} 460define dso_local i32 @dataTransferOnly1(double* noalias %a, i32 %size) { 461; CHECK-LABEL: define {{[^@]+}}@dataTransferOnly1 462; CHECK-SAME: (double* noalias [[A:%.*]], i32 [[SIZE:%.*]]) { 463; CHECK-NEXT: entry: 464; CHECK-NEXT: [[HANDLE:%.*]] = alloca [[STRUCT___TGT_ASYNC_INFO:%.*]], align 8 465; CHECK-NEXT: [[DOTOFFLOAD_BASEPTRS:%.*]] = alloca [1 x i8*], align 8 466; CHECK-NEXT: [[DOTOFFLOAD_PTRS:%.*]] = alloca [1 x i8*], align 8 467; CHECK-NEXT: [[DOTOFFLOAD_SIZES:%.*]] = alloca [1 x i64], align 8 468; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @rand() 469; CHECK-NEXT: [[CONV:%.*]] = zext i32 [[SIZE]] to i64 470; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[CONV]], 3 471; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]], i64 0, i64 0 472; CHECK-NEXT: [[TMP2:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_BASEPTRS]] to double** 473; CHECK-NEXT: store double* [[A]], double** [[TMP2]], align 8 474; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOFFLOAD_PTRS]], i64 0, i64 0 475; CHECK-NEXT: [[TMP4:%.*]] = bitcast [1 x i8*]* [[DOTOFFLOAD_PTRS]] to double** 476; CHECK-NEXT: store double* [[A]], double** [[TMP4]], align 8 477; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x i64], [1 x i64]* [[DOTOFFLOAD_SIZES]], i64 0, i64 0 478; CHECK-NEXT: store i64 [[TMP0]], i64* [[TMP5]], align 8 479; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_issue(%struct.ident_t* @[[GLOB0]], i64 -1, i32 1, i8** [[TMP1]], i8** [[TMP3]], i64* [[TMP5]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null, i8** null, %struct.__tgt_async_info* [[HANDLE]]) 480; CHECK-NEXT: [[REM:%.*]] = urem i32 [[CALL]], [[SIZE]] 481; CHECK-NEXT: call void @__tgt_target_data_begin_mapper_wait(i64 -1, %struct.__tgt_async_info* [[HANDLE]]) 482; CHECK-NEXT: call void @__tgt_target_data_end_mapper(%struct.ident_t* @[[GLOB0]], i64 -1, i32 1, i8** nonnull [[TMP1]], i8** nonnull [[TMP3]], i64* nonnull [[TMP5]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null, i8** null) 483; CHECK-NEXT: ret i32 [[REM]] 484; 485 486 487 488 489 490 491entry: 492 %.offload_baseptrs = alloca [1 x i8*], align 8 493 %.offload_ptrs = alloca [1 x i8*], align 8 494 %.offload_sizes = alloca [1 x i64], align 8 495 496 ; FIXME: call to @__tgt_target_data_begin_issue_mapper(...) should be moved here. 497 %call = tail call i32 (...) @rand() 498 499 %conv = zext i32 %size to i64 500 %0 = shl nuw nsw i64 %conv, 3 501 %1 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_baseptrs, i64 0, i64 0 502 %2 = bitcast [1 x i8*]* %.offload_baseptrs to double** 503 store double* %a, double** %2, align 8 504 %3 = getelementptr inbounds [1 x i8*], [1 x i8*]* %.offload_ptrs, i64 0, i64 0 505 %4 = bitcast [1 x i8*]* %.offload_ptrs to double** 506 store double* %a, double** %4, align 8 507 %5 = getelementptr inbounds [1 x i64], [1 x i64]* %.offload_sizes, i64 0, i64 0 508 store i64 %0, i64* %5, align 8 509 call void @__tgt_target_data_begin_mapper(%struct.ident_t* @0, i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null, i8** null) 510 511 %rem = urem i32 %call, %size 512 513 call void @__tgt_target_data_end_mapper(%struct.ident_t* @0, i64 -1, i32 1, i8** nonnull %1, i8** nonnull %3, i64* nonnull %5, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @.offload_maptypes.5, i64 0, i64 0), i8** null, i8** null) 514 ret i32 %rem 515} 516 517declare void @__tgt_target_data_begin_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) 518declare i32 @__tgt_target_teams_mapper(%struct.ident_t*, i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i8**, i32, i32) 519declare void @__tgt_target_data_end_mapper(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**) 520 521declare dso_local i32 @rand(...) 522 523; CHECK: declare void @__tgt_target_data_begin_mapper_issue(%struct.ident_t*, i64, i32, i8**, i8**, i64*, i64*, i8**, i8**, %struct.__tgt_async_info*) 524; CHECK: declare void @__tgt_target_data_begin_mapper_wait(i64, %struct.__tgt_async_info*) 525 526!llvm.module.flags = !{!0} 527 528!0 = !{i32 7, !"openmp", i32 50} 529