1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ 2 // RUN: %clang_cc1 -disable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 3 // RUN: %clang_cc1 -disable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 4 // RUN: %clang_cc1 -disable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 5 // RUN: %clang_cc1 -disable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2 6 // RUN: %clang_cc1 -disable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK3 7 // expected-no-diagnostics 8 #ifndef HEADER 9 #define HEADER 10 11 #include <complex> 12 13 // Verify we do not add tbaa metadata to type punned memory operations: 14 template <typename T> 15 void complex_reduction() { 16 #pragma omp target teams distribute 17 for (int ib = 0; ib < 100; ib++) { 18 std::complex<T> partial_sum; 19 const int istart = ib * 4; 20 const int iend = (ib + 1) * 4; 21 #pragma omp parallel for reduction(+ \ 22 : partial_sum) 23 for (int i = istart; i < iend; i++) 24 partial_sum += std::complex<T>(i, i); 25 } 26 } 27 28 void test() { 29 complex_reduction<float>(); 30 complex_reduction<double>(); 31 } 32 #endif 33 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16 34 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { 35 // CHECK1-NEXT: entry: 36 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 37 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 38 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 39 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 40 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 41 // CHECK1: user_code.entry: 42 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 43 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 44 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] 45 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] 46 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 47 // CHECK1-NEXT: ret void 48 // CHECK1: worker.exit: 49 // CHECK1-NEXT: ret void 50 // 51 // 52 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ 53 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 54 // CHECK1-NEXT: entry: 55 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 56 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 57 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 58 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 59 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 60 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 61 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 62 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 63 // CHECK1-NEXT: [[IB:%.*]] = alloca i32, align 4 64 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 65 // CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca float, align 4 66 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 67 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12:![0-9]+]] 68 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 69 // CHECK1-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 70 // CHECK1-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* 71 // CHECK1-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 72 // CHECK1-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* 73 // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 8) 74 // CHECK1-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* 75 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 76 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] 77 // CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 78 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR5]] 79 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 80 // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 81 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR5]] 82 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 83 // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 84 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] 85 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 86 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 87 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] 88 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 89 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* 90 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] 91 // CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 92 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] 93 // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 94 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 95 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 96 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 97 // CHECK1: cond.true: 98 // CHECK1-NEXT: br label [[COND_END:%.*]] 99 // CHECK1: cond.false: 100 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 101 // CHECK1-NEXT: br label [[COND_END]] 102 // CHECK1: cond.end: 103 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] 104 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 105 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 106 // CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 107 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 108 // CHECK1: omp.inner.for.cond: 109 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 110 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 111 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] 112 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 113 // CHECK1: omp.inner.for.cond.cleanup: 114 // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 115 // CHECK1: omp.inner.for.body: 116 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 117 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 118 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 119 // CHECK1-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] 120 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast float* [[REF_TMP]] to i8* 121 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP14]]) #[[ATTR5]] 122 // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14:![0-9]+]] 123 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* 124 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR5]] 125 // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] 126 // CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR10:[0-9]+]] 127 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* 128 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] 129 // CHECK1-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* 130 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] 131 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 132 // CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 133 // CHECK1-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] 134 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 135 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 136 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 137 // CHECK1-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] 138 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 139 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* 140 // CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] 141 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 142 // CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* 143 // CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] 144 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 145 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM_ON_STACK]] to i8* 146 // CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] 147 // CHECK1-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 148 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex"*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP26]], i64 3) 149 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 150 // CHECK1: omp.body.continue: 151 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 152 // CHECK1: omp.inner.for.inc: 153 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 154 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 155 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 156 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] 157 // CHECK1: omp.inner.for.end: 158 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 159 // CHECK1: omp.loop.exit: 160 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) 161 // CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* 162 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR5]] 163 // CHECK1-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 164 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR5]] 165 // CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 166 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR5]] 167 // CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 168 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR5]] 169 // CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 170 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR5]] 171 // CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 172 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] 173 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 8) 174 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) 175 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) 176 // CHECK1-NEXT: ret void 177 // 178 // 179 // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC1ERKfS2_ 180 // CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] comdat align 2 { 181 // CHECK1-NEXT: entry: 182 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 183 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca float*, align 8 184 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca float*, align 8 185 // CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 186 // CHECK1-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 187 // CHECK1-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 188 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 189 // CHECK1-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 190 // CHECK1-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 191 // CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR10]] 192 // CHECK1-NEXT: ret void 193 // 194 // 195 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 196 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM:%.*]]) #[[ATTR0]] { 197 // CHECK1-NEXT: entry: 198 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 199 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 200 // CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca i32*, align 8 201 // CHECK1-NEXT: [[IEND_ADDR:%.*]] = alloca i32*, align 8 202 // CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 203 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 204 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 205 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 206 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 207 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 208 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 209 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 210 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 211 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 212 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 213 // CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex", align 4 214 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 215 // CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca float, align 4 216 // CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4 217 // CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex", align 4 218 // CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca float, align 4 219 // CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca float, align 4 220 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 221 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] 222 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 223 // CHECK1-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 224 // CHECK1-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 225 // CHECK1-NEXT: store %"class.std::complex"* [[PARTIAL_SUM]], %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 226 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 227 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 228 // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 229 // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 230 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] 231 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 232 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] 233 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] 234 // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 235 // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 236 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] 237 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] 238 // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 239 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 240 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] 241 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 242 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 243 // CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] 244 // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 245 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 246 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 247 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 248 // CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 249 // CHECK1-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* 250 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] 251 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 252 // CHECK1-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] 253 // CHECK1-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* 254 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] 255 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 256 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 257 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] 258 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 259 // CHECK1: omp.precond.then: 260 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 261 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] 262 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 263 // CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 264 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] 265 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 266 // CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 267 // CHECK1-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 268 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] 269 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 270 // CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 271 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] 272 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 273 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* 274 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR5]] 275 // CHECK1-NEXT: [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8* 276 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR5]] 277 // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14]] 278 // CHECK1-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* 279 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR5]] 280 // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] 281 // CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR10]] 282 // CHECK1-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* 283 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR5]] 284 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* 285 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR5]] 286 // CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* 287 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] 288 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 289 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] 290 // CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 291 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] 292 // CHECK1: omp.dispatch.cond: 293 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 294 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 295 // CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] 296 // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 297 // CHECK1: cond.true: 298 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 299 // CHECK1-NEXT: br label [[COND_END:%.*]] 300 // CHECK1: cond.false: 301 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 302 // CHECK1-NEXT: br label [[COND_END]] 303 // CHECK1: cond.end: 304 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] 305 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 306 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 307 // CHECK1-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 308 // CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 309 // CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 310 // CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 311 // CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] 312 // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] 313 // CHECK1: omp.dispatch.cleanup: 314 // CHECK1-NEXT: br label [[OMP_DISPATCH_END:%.*]] 315 // CHECK1: omp.dispatch.body: 316 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 317 // CHECK1: omp.inner.for.cond: 318 // CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 319 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 320 // CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 321 // CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] 322 // CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 323 // CHECK1: omp.inner.for.cond.cleanup: 324 // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 325 // CHECK1: omp.inner.for.body: 326 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 327 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 328 // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 329 // CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] 330 // CHECK1-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] 331 // CHECK1-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* 332 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR5]] 333 // CHECK1-NEXT: [[TMP41:%.*]] = bitcast float* [[REF_TMP15]] to i8* 334 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR5]] 335 // CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 336 // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to float 337 // CHECK1-NEXT: store float [[CONV]], float* [[REF_TMP15]], align 4, !tbaa [[TBAA14]] 338 // CHECK1-NEXT: [[TMP43:%.*]] = bitcast float* [[REF_TMP16]] to i8* 339 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR5]] 340 // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 341 // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float 342 // CHECK1-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] 343 // CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR10]] 344 // CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR10]] 345 // CHECK1-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* 346 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR5]] 347 // CHECK1-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* 348 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR5]] 349 // CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* 350 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR5]] 351 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 352 // CHECK1: omp.body.continue: 353 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 354 // CHECK1: omp.inner.for.inc: 355 // CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 356 // CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 357 // CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 358 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] 359 // CHECK1: omp.inner.for.end: 360 // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] 361 // CHECK1: omp.dispatch.inc: 362 // CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 363 // CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 364 // CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] 365 // CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 366 // CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 367 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 368 // CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] 369 // CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 370 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] 371 // CHECK1: omp.dispatch.end: 372 // CHECK1-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 373 // CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] 374 // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP54]]) 375 // CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 376 // CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] 377 // CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 378 // CHECK1-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* 379 // CHECK1-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 380 // CHECK1-NEXT: [[TMP59:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* 381 // CHECK1-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) 382 // CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 383 // CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 384 // CHECK1: .omp.reduction.then: 385 // CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR10]] 386 // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) 387 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 388 // CHECK1: .omp.reduction.done: 389 // CHECK1-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* 390 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] 391 // CHECK1-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* 392 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR5]] 393 // CHECK1-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 394 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] 395 // CHECK1-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 396 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] 397 // CHECK1-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 398 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] 399 // CHECK1-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 400 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] 401 // CHECK1-NEXT: br label [[OMP_PRECOND_END]] 402 // CHECK1: omp.precond.end: 403 // CHECK1-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 404 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] 405 // CHECK1-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 406 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] 407 // CHECK1-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 408 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] 409 // CHECK1-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 410 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] 411 // CHECK1-NEXT: ret void 412 // 413 // 414 // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEpLIfEERS0_RKS_IT_E 415 // CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[__C:%.*]]) #[[ATTR4:[0-9]+]] comdat align 2 { 416 // CHECK1-NEXT: entry: 417 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 418 // CHECK1-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 419 // CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 420 // CHECK1-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 421 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 422 // CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 423 // CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR10]] 424 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 425 // CHECK1-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] 426 // CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] 427 // CHECK1-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] 428 // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 429 // CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR10]] 430 // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 431 // CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] 432 // CHECK1-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] 433 // CHECK1-NEXT: store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA18]] 434 // CHECK1-NEXT: ret %"class.std::complex"* [[THIS1]] 435 // 436 // 437 // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func 438 // CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR6:[0-9]+]] { 439 // CHECK1-NEXT: entry: 440 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 441 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 442 // CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 443 // CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 444 // CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 445 // CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex", align 4 446 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 447 // CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19:![0-9]+]] 448 // CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 449 // CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 450 // CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 451 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* 452 // CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] 453 // CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 454 // CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 455 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 456 // CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 457 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 458 // CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex"* 459 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex", %"class.std::complex"* [[TMP12]], i64 1 460 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex"* [[TMP13]] to i8* 461 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex"* [[TMP12]] to i64* 462 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* 463 // CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 4 464 // CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() 465 // CHECK1-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 466 // CHECK1-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) 467 // CHECK1-NEXT: store i64 [[TMP20]], i64* [[TMP16]], align 4 468 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 469 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 470 // CHECK1-NEXT: [[TMP23:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* 471 // CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] 472 // CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 473 // CHECK1-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 474 // CHECK1-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] 475 // CHECK1-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] 476 // CHECK1-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 477 // CHECK1-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 478 // CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 479 // CHECK1-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] 480 // CHECK1-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 481 // CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] 482 // CHECK1-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] 483 // CHECK1-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] 484 // CHECK1-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] 485 // CHECK1: then: 486 // CHECK1-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* 487 // CHECK1-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* 488 // CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP36]], i8* [[TMP37]]) #[[ATTR5]] 489 // CHECK1-NEXT: br label [[IFCONT:%.*]] 490 // CHECK1: else: 491 // CHECK1-NEXT: br label [[IFCONT]] 492 // CHECK1: ifcont: 493 // CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 494 // CHECK1-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] 495 // CHECK1-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] 496 // CHECK1-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 497 // CHECK1: then4: 498 // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 499 // CHECK1-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8 500 // CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 501 // CHECK1-NEXT: [[TMP44:%.*]] = load i8*, i8** [[TMP43]], align 8 502 // CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %"class.std::complex"* 503 // CHECK1-NEXT: [[TMP46:%.*]] = bitcast i8* [[TMP44]] to %"class.std::complex"* 504 // CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP46]] to i8* 505 // CHECK1-NEXT: [[TMP48:%.*]] = bitcast %"class.std::complex"* [[TMP45]] to i8* 506 // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 8, i1 false), !tbaa.struct !21 507 // CHECK1-NEXT: br label [[IFCONT6:%.*]] 508 // CHECK1: else5: 509 // CHECK1-NEXT: br label [[IFCONT6]] 510 // CHECK1: ifcont6: 511 // CHECK1-NEXT: ret void 512 // 513 // 514 // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func 515 // CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { 516 // CHECK1-NEXT: entry: 517 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 518 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 519 // CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 520 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 521 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 522 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 523 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 524 // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 525 // CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 526 // CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 527 // CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 528 // CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 529 // CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* 530 // CHECK1-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 531 // CHECK1-NEXT: br label [[PRECOND:%.*]] 532 // CHECK1: precond: 533 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 534 // CHECK1-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 535 // CHECK1-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] 536 // CHECK1: body: 537 // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) 538 // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 539 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 540 // CHECK1: then: 541 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 542 // CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8, !tbaa [[TBAA12]] 543 // CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* 544 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] 545 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 546 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 547 // CHECK1-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 548 // CHECK1-NEXT: br label [[IFCONT:%.*]] 549 // CHECK1: else: 550 // CHECK1-NEXT: br label [[IFCONT]] 551 // CHECK1: ifcont: 552 // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) 553 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 554 // CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] 555 // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 556 // CHECK1: then2: 557 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 558 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 559 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8, !tbaa [[TBAA12]] 560 // CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* 561 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] 562 // CHECK1-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4, !tbaa [[TBAA8]] 563 // CHECK1-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !tbaa [[TBAA8]] 564 // CHECK1-NEXT: br label [[IFCONT4:%.*]] 565 // CHECK1: else3: 566 // CHECK1-NEXT: br label [[IFCONT4]] 567 // CHECK1: ifcont4: 568 // CHECK1-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 569 // CHECK1-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 570 // CHECK1-NEXT: br label [[PRECOND]] 571 // CHECK1: exit: 572 // CHECK1-NEXT: ret void 573 // 574 // 575 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 576 // CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { 577 // CHECK1-NEXT: entry: 578 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 579 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 580 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 581 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 582 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] 583 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 584 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 585 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 586 // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 587 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 588 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** 589 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] 590 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 591 // CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** 592 // CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] 593 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 594 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex"** 595 // CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8, !tbaa [[TBAA12]] 596 // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR5]] 597 // CHECK1-NEXT: ret void 598 // 599 // 600 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16 601 // CHECK1-SAME: () #[[ATTR0]] { 602 // CHECK1-NEXT: entry: 603 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 604 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 605 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) 606 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 607 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 608 // CHECK1: user_code.entry: 609 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 610 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 611 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] 612 // CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]] 613 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 614 // CHECK1-NEXT: ret void 615 // CHECK1: worker.exit: 616 // CHECK1-NEXT: ret void 617 // 618 // 619 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 620 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 621 // CHECK1-NEXT: entry: 622 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 623 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 624 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 625 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 626 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 627 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 628 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 629 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 630 // CHECK1-NEXT: [[IB:%.*]] = alloca i32, align 4 631 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 632 // CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca double, align 8 633 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 634 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] 635 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 636 // CHECK1-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 637 // CHECK1-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* 638 // CHECK1-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 639 // CHECK1-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* 640 // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 16) 641 // CHECK1-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex.0"* 642 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 643 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] 644 // CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 645 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR5]] 646 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 647 // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 648 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR5]] 649 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 650 // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 651 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] 652 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 653 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 654 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] 655 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 656 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* 657 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] 658 // CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 659 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] 660 // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 661 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 662 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 663 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 664 // CHECK1: cond.true: 665 // CHECK1-NEXT: br label [[COND_END:%.*]] 666 // CHECK1: cond.false: 667 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 668 // CHECK1-NEXT: br label [[COND_END]] 669 // CHECK1: cond.end: 670 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] 671 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 672 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 673 // CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 674 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 675 // CHECK1: omp.inner.for.cond: 676 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 677 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 678 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] 679 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 680 // CHECK1: omp.inner.for.cond.cleanup: 681 // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 682 // CHECK1: omp.inner.for.body: 683 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 684 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 685 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 686 // CHECK1-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] 687 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast double* [[REF_TMP]] to i8* 688 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP14]]) #[[ATTR5]] 689 // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22:![0-9]+]] 690 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* 691 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR5]] 692 // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] 693 // CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR10]] 694 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* 695 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR5]] 696 // CHECK1-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* 697 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP17]]) #[[ATTR5]] 698 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 699 // CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 700 // CHECK1-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] 701 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 702 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 703 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 704 // CHECK1-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] 705 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 706 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* 707 // CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] 708 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 709 // CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* 710 // CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] 711 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 712 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM_ON_STACK]] to i8* 713 // CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] 714 // CHECK1-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 715 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex.0"*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP26]], i64 3) 716 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 717 // CHECK1: omp.body.continue: 718 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 719 // CHECK1: omp.inner.for.inc: 720 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 721 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 722 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 723 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] 724 // CHECK1: omp.inner.for.end: 725 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 726 // CHECK1: omp.loop.exit: 727 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) 728 // CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* 729 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR5]] 730 // CHECK1-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 731 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR5]] 732 // CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 733 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR5]] 734 // CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 735 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR5]] 736 // CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 737 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR5]] 738 // CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 739 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] 740 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 16) 741 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) 742 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) 743 // CHECK1-NEXT: ret void 744 // 745 // 746 // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC1ERKdS2_ 747 // CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { 748 // CHECK1-NEXT: entry: 749 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 750 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 751 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 752 // CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 753 // CHECK1-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 754 // CHECK1-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 755 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 756 // CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 757 // CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 758 // CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR10]] 759 // CHECK1-NEXT: ret void 760 // 761 // 762 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 763 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR0]] { 764 // CHECK1-NEXT: entry: 765 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 766 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 767 // CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca i32*, align 8 768 // CHECK1-NEXT: [[IEND_ADDR:%.*]] = alloca i32*, align 8 769 // CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 770 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 771 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 772 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 773 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 774 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 775 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 776 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 777 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 778 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 779 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 780 // CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex.0", align 8 781 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 782 // CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca double, align 8 783 // CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4 784 // CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex.0", align 8 785 // CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca double, align 8 786 // CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca double, align 8 787 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 788 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] 789 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 790 // CHECK1-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 791 // CHECK1-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 792 // CHECK1-NEXT: store %"class.std::complex.0"* [[PARTIAL_SUM]], %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 793 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 794 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 795 // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 796 // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 797 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] 798 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 799 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] 800 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] 801 // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 802 // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 803 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] 804 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] 805 // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 806 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 807 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] 808 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 809 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 810 // CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] 811 // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 812 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 813 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 814 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 815 // CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 816 // CHECK1-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* 817 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] 818 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 819 // CHECK1-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] 820 // CHECK1-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* 821 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] 822 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 823 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 824 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] 825 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 826 // CHECK1: omp.precond.then: 827 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 828 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] 829 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 830 // CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 831 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] 832 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 833 // CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 834 // CHECK1-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 835 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] 836 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 837 // CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 838 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] 839 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 840 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* 841 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR5]] 842 // CHECK1-NEXT: [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8* 843 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR5]] 844 // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22]] 845 // CHECK1-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* 846 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR5]] 847 // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] 848 // CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR10]] 849 // CHECK1-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* 850 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR5]] 851 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* 852 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR5]] 853 // CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* 854 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] 855 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 856 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] 857 // CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 858 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] 859 // CHECK1: omp.dispatch.cond: 860 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 861 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 862 // CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] 863 // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 864 // CHECK1: cond.true: 865 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 866 // CHECK1-NEXT: br label [[COND_END:%.*]] 867 // CHECK1: cond.false: 868 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 869 // CHECK1-NEXT: br label [[COND_END]] 870 // CHECK1: cond.end: 871 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] 872 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 873 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 874 // CHECK1-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 875 // CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 876 // CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 877 // CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 878 // CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] 879 // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] 880 // CHECK1: omp.dispatch.cleanup: 881 // CHECK1-NEXT: br label [[OMP_DISPATCH_END:%.*]] 882 // CHECK1: omp.dispatch.body: 883 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 884 // CHECK1: omp.inner.for.cond: 885 // CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 886 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 887 // CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 888 // CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] 889 // CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 890 // CHECK1: omp.inner.for.cond.cleanup: 891 // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 892 // CHECK1: omp.inner.for.body: 893 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 894 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 895 // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 896 // CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] 897 // CHECK1-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] 898 // CHECK1-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* 899 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR5]] 900 // CHECK1-NEXT: [[TMP41:%.*]] = bitcast double* [[REF_TMP15]] to i8* 901 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR5]] 902 // CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 903 // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to double 904 // CHECK1-NEXT: store double [[CONV]], double* [[REF_TMP15]], align 8, !tbaa [[TBAA22]] 905 // CHECK1-NEXT: [[TMP43:%.*]] = bitcast double* [[REF_TMP16]] to i8* 906 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR5]] 907 // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 908 // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double 909 // CHECK1-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] 910 // CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR10]] 911 // CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR10]] 912 // CHECK1-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* 913 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR5]] 914 // CHECK1-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* 915 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR5]] 916 // CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* 917 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR5]] 918 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 919 // CHECK1: omp.body.continue: 920 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 921 // CHECK1: omp.inner.for.inc: 922 // CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 923 // CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 924 // CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 925 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] 926 // CHECK1: omp.inner.for.end: 927 // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] 928 // CHECK1: omp.dispatch.inc: 929 // CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 930 // CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 931 // CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] 932 // CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 933 // CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 934 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 935 // CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] 936 // CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 937 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] 938 // CHECK1: omp.dispatch.end: 939 // CHECK1-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 940 // CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] 941 // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP54]]) 942 // CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 943 // CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] 944 // CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 945 // CHECK1-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* 946 // CHECK1-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 947 // CHECK1-NEXT: [[TMP59:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* 948 // CHECK1-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func5, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func6) 949 // CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 950 // CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 951 // CHECK1: .omp.reduction.then: 952 // CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR10]] 953 // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) 954 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 955 // CHECK1: .omp.reduction.done: 956 // CHECK1-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* 957 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] 958 // CHECK1-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* 959 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR5]] 960 // CHECK1-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 961 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] 962 // CHECK1-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 963 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] 964 // CHECK1-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 965 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] 966 // CHECK1-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 967 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] 968 // CHECK1-NEXT: br label [[OMP_PRECOND_END]] 969 // CHECK1: omp.precond.end: 970 // CHECK1-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 971 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] 972 // CHECK1-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 973 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] 974 // CHECK1-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 975 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] 976 // CHECK1-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 977 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] 978 // CHECK1-NEXT: ret void 979 // 980 // 981 // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEpLIdEERS0_RKS_IT_E 982 // CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR4]] comdat align 2 { 983 // CHECK1-NEXT: entry: 984 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 985 // CHECK1-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 986 // CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 987 // CHECK1-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 988 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 989 // CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 990 // CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR10]] 991 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 992 // CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] 993 // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] 994 // CHECK1-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] 995 // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 996 // CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR10]] 997 // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 998 // CHECK1-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] 999 // CHECK1-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] 1000 // CHECK1-NEXT: store double [[ADD3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] 1001 // CHECK1-NEXT: ret %"class.std::complex.0"* [[THIS1]] 1002 // 1003 // 1004 // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 1005 // CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR6]] { 1006 // CHECK1-NEXT: entry: 1007 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 1008 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 1009 // CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 1010 // CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 1011 // CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 1012 // CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.0", align 8 1013 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 1014 // CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] 1015 // CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 1016 // CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 1017 // CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 1018 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* 1019 // CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] 1020 // CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 1021 // CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 1022 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 1023 // CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 1024 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 1025 // CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex.0"* 1026 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.0", %"class.std::complex.0"* [[TMP12]], i64 1 1027 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex.0"* [[TMP13]] to i8* 1028 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.0"* [[TMP12]] to i64* 1029 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* 1030 // CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] 1031 // CHECK1: .shuffle.pre_cond: 1032 // CHECK1-NEXT: [[TMP17:%.*]] = phi i64* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[TMP29:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] 1033 // CHECK1-NEXT: [[TMP18:%.*]] = phi i64* [ [[TMP16]], [[ENTRY]] ], [ [[TMP30:%.*]], [[DOTSHUFFLE_THEN]] ] 1034 // CHECK1-NEXT: [[TMP19:%.*]] = bitcast i64* [[TMP17]] to i8* 1035 // CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[TMP14]] to i64 1036 // CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[TMP19]] to i64 1037 // CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] 1038 // CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) 1039 // CHECK1-NEXT: [[TMP24:%.*]] = icmp sgt i64 [[TMP23]], 7 1040 // CHECK1-NEXT: br i1 [[TMP24]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] 1041 // CHECK1: .shuffle.then: 1042 // CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP17]], align 8 1043 // CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_get_warp_size() 1044 // CHECK1-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 1045 // CHECK1-NEXT: [[TMP28:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP25]], i16 [[TMP7]], i16 [[TMP27]]) 1046 // CHECK1-NEXT: store i64 [[TMP28]], i64* [[TMP18]], align 8 1047 // CHECK1-NEXT: [[TMP29]] = getelementptr i64, i64* [[TMP17]], i64 1 1048 // CHECK1-NEXT: [[TMP30]] = getelementptr i64, i64* [[TMP18]], i64 1 1049 // CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND]] 1050 // CHECK1: .shuffle.exit: 1051 // CHECK1-NEXT: [[TMP31:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* 1052 // CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] 1053 // CHECK1-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 0 1054 // CHECK1-NEXT: [[TMP33:%.*]] = icmp eq i16 [[TMP8]], 1 1055 // CHECK1-NEXT: [[TMP34:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] 1056 // CHECK1-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] 1057 // CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 2 1058 // CHECK1-NEXT: [[TMP37:%.*]] = and i16 [[TMP6]], 1 1059 // CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP37]], 0 1060 // CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP36]], [[TMP38]] 1061 // CHECK1-NEXT: [[TMP40:%.*]] = icmp sgt i16 [[TMP7]], 0 1062 // CHECK1-NEXT: [[TMP41:%.*]] = and i1 [[TMP39]], [[TMP40]] 1063 // CHECK1-NEXT: [[TMP42:%.*]] = or i1 [[TMP32]], [[TMP35]] 1064 // CHECK1-NEXT: [[TMP43:%.*]] = or i1 [[TMP42]], [[TMP41]] 1065 // CHECK1-NEXT: br i1 [[TMP43]], label [[THEN:%.*]], label [[ELSE:%.*]] 1066 // CHECK1: then: 1067 // CHECK1-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* 1068 // CHECK1-NEXT: [[TMP45:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* 1069 // CHECK1-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP44]], i8* [[TMP45]]) #[[ATTR5]] 1070 // CHECK1-NEXT: br label [[IFCONT:%.*]] 1071 // CHECK1: else: 1072 // CHECK1-NEXT: br label [[IFCONT]] 1073 // CHECK1: ifcont: 1074 // CHECK1-NEXT: [[TMP46:%.*]] = icmp eq i16 [[TMP8]], 1 1075 // CHECK1-NEXT: [[TMP47:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] 1076 // CHECK1-NEXT: [[TMP48:%.*]] = and i1 [[TMP46]], [[TMP47]] 1077 // CHECK1-NEXT: br i1 [[TMP48]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 1078 // CHECK1: then4: 1079 // CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 1080 // CHECK1-NEXT: [[TMP50:%.*]] = load i8*, i8** [[TMP49]], align 8 1081 // CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 1082 // CHECK1-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 8 1083 // CHECK1-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP50]] to %"class.std::complex.0"* 1084 // CHECK1-NEXT: [[TMP54:%.*]] = bitcast i8* [[TMP52]] to %"class.std::complex.0"* 1085 // CHECK1-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP54]] to i8* 1086 // CHECK1-NEXT: [[TMP56:%.*]] = bitcast %"class.std::complex.0"* [[TMP53]] to i8* 1087 // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP55]], i8* align 8 [[TMP56]], i64 16, i1 false), !tbaa.struct !27 1088 // CHECK1-NEXT: br label [[IFCONT6:%.*]] 1089 // CHECK1: else5: 1090 // CHECK1-NEXT: br label [[IFCONT6]] 1091 // CHECK1: ifcont6: 1092 // CHECK1-NEXT: ret void 1093 // 1094 // 1095 // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 1096 // CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { 1097 // CHECK1-NEXT: entry: 1098 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 1099 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1100 // CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 1101 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 1102 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 1103 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 1104 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1105 // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1106 // CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 1107 // CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1108 // CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 1109 // CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 1110 // CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* 1111 // CHECK1-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 1112 // CHECK1-NEXT: br label [[PRECOND:%.*]] 1113 // CHECK1: precond: 1114 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 1115 // CHECK1-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 4 1116 // CHECK1-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] 1117 // CHECK1: body: 1118 // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) 1119 // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 1120 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 1121 // CHECK1: then: 1122 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 1123 // CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8, !tbaa [[TBAA12]] 1124 // CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* 1125 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] 1126 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 1127 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 1128 // CHECK1-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 1129 // CHECK1-NEXT: br label [[IFCONT:%.*]] 1130 // CHECK1: else: 1131 // CHECK1-NEXT: br label [[IFCONT]] 1132 // CHECK1: ifcont: 1133 // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) 1134 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 1135 // CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] 1136 // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 1137 // CHECK1: then2: 1138 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 1139 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 1140 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8, !tbaa [[TBAA12]] 1141 // CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* 1142 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] 1143 // CHECK1-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4, !tbaa [[TBAA8]] 1144 // CHECK1-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !tbaa [[TBAA8]] 1145 // CHECK1-NEXT: br label [[IFCONT4:%.*]] 1146 // CHECK1: else3: 1147 // CHECK1-NEXT: br label [[IFCONT4]] 1148 // CHECK1: ifcont4: 1149 // CHECK1-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 1150 // CHECK1-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 1151 // CHECK1-NEXT: br label [[PRECOND]] 1152 // CHECK1: exit: 1153 // CHECK1-NEXT: ret void 1154 // 1155 // 1156 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 1157 // CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { 1158 // CHECK1-NEXT: entry: 1159 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1160 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1161 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1162 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1163 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] 1164 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 1165 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 1166 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1167 // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1168 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 1169 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** 1170 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] 1171 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 1172 // CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** 1173 // CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] 1174 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 1175 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** 1176 // CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8, !tbaa [[TBAA12]] 1177 // CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.0"* [[TMP11]]) #[[ATTR5]] 1178 // CHECK1-NEXT: ret void 1179 // 1180 // 1181 // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC2ERKfS2_ 1182 // CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { 1183 // CHECK1-NEXT: entry: 1184 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 1185 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca float*, align 8 1186 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca float*, align 8 1187 // CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 1188 // CHECK1-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 1189 // CHECK1-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 1190 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 1191 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 1192 // CHECK1-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 1193 // CHECK1-NEXT: [[TMP1:%.*]] = load float, float* [[TMP0]], align 4, !tbaa [[TBAA14]] 1194 // CHECK1-NEXT: store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] 1195 // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 1196 // CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 1197 // CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[TMP2]], align 4, !tbaa [[TBAA14]] 1198 // CHECK1-NEXT: store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA18]] 1199 // CHECK1-NEXT: ret void 1200 // 1201 // 1202 // CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4realEv 1203 // CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { 1204 // CHECK1-NEXT: entry: 1205 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 1206 // CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 1207 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 1208 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 1209 // CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16]] 1210 // CHECK1-NEXT: ret float [[TMP0]] 1211 // 1212 // 1213 // CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4imagEv 1214 // CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { 1215 // CHECK1-NEXT: entry: 1216 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 1217 // CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 1218 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 1219 // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 1220 // CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18]] 1221 // CHECK1-NEXT: ret float [[TMP0]] 1222 // 1223 // 1224 // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC2ERKdS2_ 1225 // CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { 1226 // CHECK1-NEXT: entry: 1227 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 1228 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 1229 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 1230 // CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 1231 // CHECK1-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 1232 // CHECK1-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 1233 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 1234 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 1235 // CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 1236 // CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8, !tbaa [[TBAA22]] 1237 // CHECK1-NEXT: store double [[TMP1]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] 1238 // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 1239 // CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 1240 // CHECK1-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8, !tbaa [[TBAA22]] 1241 // CHECK1-NEXT: store double [[TMP3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] 1242 // CHECK1-NEXT: ret void 1243 // 1244 // 1245 // CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4realEv 1246 // CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { 1247 // CHECK1-NEXT: entry: 1248 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 1249 // CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 1250 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 1251 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 1252 // CHECK1-NEXT: [[TMP0:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24]] 1253 // CHECK1-NEXT: ret double [[TMP0]] 1254 // 1255 // 1256 // CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4imagEv 1257 // CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { 1258 // CHECK1-NEXT: entry: 1259 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 1260 // CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 1261 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 1262 // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 1263 // CHECK1-NEXT: [[TMP0:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26]] 1264 // CHECK1-NEXT: ret double [[TMP0]] 1265 // 1266 // 1267 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16 1268 // CHECK2-SAME: () #[[ATTR0:[0-9]+]] { 1269 // CHECK2-NEXT: entry: 1270 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1271 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1272 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 1273 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1274 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1275 // CHECK2: user_code.entry: 1276 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 1277 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 1278 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] 1279 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] 1280 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1281 // CHECK2-NEXT: ret void 1282 // CHECK2: worker.exit: 1283 // CHECK2-NEXT: ret void 1284 // 1285 // 1286 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ 1287 // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1288 // CHECK2-NEXT: entry: 1289 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 1290 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 1291 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 1292 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 1293 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 1294 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 1295 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 1296 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 1297 // CHECK2-NEXT: [[IB:%.*]] = alloca i32, align 4 1298 // CHECK2-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 1299 // CHECK2-NEXT: [[REF_TMP2:%.*]] = alloca float, align 4 1300 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 1301 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12:![0-9]+]] 1302 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 1303 // CHECK2-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 1304 // CHECK2-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* 1305 // CHECK2-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 1306 // CHECK2-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* 1307 // CHECK2-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 8) 1308 // CHECK2-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* 1309 // CHECK2-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 1310 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] 1311 // CHECK2-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 1312 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR5]] 1313 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 1314 // CHECK2-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 1315 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR5]] 1316 // CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1317 // CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 1318 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] 1319 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 1320 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 1321 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] 1322 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 1323 // CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* 1324 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] 1325 // CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1326 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] 1327 // CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 1328 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1329 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 1330 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 1331 // CHECK2: cond.true: 1332 // CHECK2-NEXT: br label [[COND_END:%.*]] 1333 // CHECK2: cond.false: 1334 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1335 // CHECK2-NEXT: br label [[COND_END]] 1336 // CHECK2: cond.end: 1337 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] 1338 // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1339 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 1340 // CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1341 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 1342 // CHECK2: omp.inner.for.cond: 1343 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1344 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1345 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] 1346 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 1347 // CHECK2: omp.inner.for.cond.cleanup: 1348 // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 1349 // CHECK2: omp.inner.for.body: 1350 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1351 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 1352 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 1353 // CHECK2-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] 1354 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast float* [[REF_TMP]] to i8* 1355 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP14]]) #[[ATTR5]] 1356 // CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14:![0-9]+]] 1357 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* 1358 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR5]] 1359 // CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] 1360 // CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR10:[0-9]+]] 1361 // CHECK2-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* 1362 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] 1363 // CHECK2-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* 1364 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] 1365 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 1366 // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 1367 // CHECK2-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] 1368 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 1369 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 1370 // CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 1371 // CHECK2-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] 1372 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1373 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* 1374 // CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] 1375 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 1376 // CHECK2-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* 1377 // CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] 1378 // CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 1379 // CHECK2-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM_ON_STACK]] to i8* 1380 // CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] 1381 // CHECK2-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1382 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex"*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP26]], i64 3) 1383 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 1384 // CHECK2: omp.body.continue: 1385 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 1386 // CHECK2: omp.inner.for.inc: 1387 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1388 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 1389 // CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1390 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] 1391 // CHECK2: omp.inner.for.end: 1392 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 1393 // CHECK2: omp.loop.exit: 1394 // CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) 1395 // CHECK2-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* 1396 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR5]] 1397 // CHECK2-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 1398 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR5]] 1399 // CHECK2-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 1400 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR5]] 1401 // CHECK2-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 1402 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR5]] 1403 // CHECK2-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 1404 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR5]] 1405 // CHECK2-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 1406 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] 1407 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 8) 1408 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) 1409 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) 1410 // CHECK2-NEXT: ret void 1411 // 1412 // 1413 // CHECK2-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC1ERKfS2_ 1414 // CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] comdat align 2 { 1415 // CHECK2-NEXT: entry: 1416 // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 1417 // CHECK2-NEXT: [[__RE_ADDR:%.*]] = alloca float*, align 8 1418 // CHECK2-NEXT: [[__IM_ADDR:%.*]] = alloca float*, align 8 1419 // CHECK2-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 1420 // CHECK2-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 1421 // CHECK2-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 1422 // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 1423 // CHECK2-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 1424 // CHECK2-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 1425 // CHECK2-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR10]] 1426 // CHECK2-NEXT: ret void 1427 // 1428 // 1429 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 1430 // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM:%.*]]) #[[ATTR0]] { 1431 // CHECK2-NEXT: entry: 1432 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 1433 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 1434 // CHECK2-NEXT: [[ISTART_ADDR:%.*]] = alloca i32*, align 8 1435 // CHECK2-NEXT: [[IEND_ADDR:%.*]] = alloca i32*, align 8 1436 // CHECK2-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 1437 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 1438 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 1439 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 1440 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 1441 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 1442 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 1443 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 1444 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 1445 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 1446 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 1447 // CHECK2-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex", align 4 1448 // CHECK2-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 1449 // CHECK2-NEXT: [[REF_TMP6:%.*]] = alloca float, align 4 1450 // CHECK2-NEXT: [[I7:%.*]] = alloca i32, align 4 1451 // CHECK2-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex", align 4 1452 // CHECK2-NEXT: [[REF_TMP15:%.*]] = alloca float, align 4 1453 // CHECK2-NEXT: [[REF_TMP16:%.*]] = alloca float, align 4 1454 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 1455 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] 1456 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 1457 // CHECK2-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 1458 // CHECK2-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 1459 // CHECK2-NEXT: store %"class.std::complex"* [[PARTIAL_SUM]], %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 1460 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 1461 // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 1462 // CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 1463 // CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 1464 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] 1465 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 1466 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] 1467 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] 1468 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 1469 // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 1470 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] 1471 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] 1472 // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 1473 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 1474 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] 1475 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 1476 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 1477 // CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] 1478 // CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 1479 // CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 1480 // CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 1481 // CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 1482 // CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 1483 // CHECK2-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* 1484 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] 1485 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 1486 // CHECK2-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] 1487 // CHECK2-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* 1488 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] 1489 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 1490 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 1491 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] 1492 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 1493 // CHECK2: omp.precond.then: 1494 // CHECK2-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 1495 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] 1496 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 1497 // CHECK2-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 1498 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] 1499 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 1500 // CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1501 // CHECK2-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 1502 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] 1503 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 1504 // CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 1505 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] 1506 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 1507 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* 1508 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR5]] 1509 // CHECK2-NEXT: [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8* 1510 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR5]] 1511 // CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14]] 1512 // CHECK2-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* 1513 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR5]] 1514 // CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] 1515 // CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR10]] 1516 // CHECK2-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* 1517 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR5]] 1518 // CHECK2-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* 1519 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR5]] 1520 // CHECK2-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* 1521 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] 1522 // CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1523 // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] 1524 // CHECK2-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 1525 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] 1526 // CHECK2: omp.dispatch.cond: 1527 // CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1528 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 1529 // CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] 1530 // CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 1531 // CHECK2: cond.true: 1532 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 1533 // CHECK2-NEXT: br label [[COND_END:%.*]] 1534 // CHECK2: cond.false: 1535 // CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1536 // CHECK2-NEXT: br label [[COND_END]] 1537 // CHECK2: cond.end: 1538 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] 1539 // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1540 // CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 1541 // CHECK2-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1542 // CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1543 // CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1544 // CHECK2-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 1545 // CHECK2-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] 1546 // CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] 1547 // CHECK2: omp.dispatch.cleanup: 1548 // CHECK2-NEXT: br label [[OMP_DISPATCH_END:%.*]] 1549 // CHECK2: omp.dispatch.body: 1550 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 1551 // CHECK2: omp.inner.for.cond: 1552 // CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1553 // CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1554 // CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 1555 // CHECK2-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] 1556 // CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 1557 // CHECK2: omp.inner.for.cond.cleanup: 1558 // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 1559 // CHECK2: omp.inner.for.body: 1560 // CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 1561 // CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1562 // CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 1563 // CHECK2-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] 1564 // CHECK2-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] 1565 // CHECK2-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* 1566 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR5]] 1567 // CHECK2-NEXT: [[TMP41:%.*]] = bitcast float* [[REF_TMP15]] to i8* 1568 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR5]] 1569 // CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 1570 // CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to float 1571 // CHECK2-NEXT: store float [[CONV]], float* [[REF_TMP15]], align 4, !tbaa [[TBAA14]] 1572 // CHECK2-NEXT: [[TMP43:%.*]] = bitcast float* [[REF_TMP16]] to i8* 1573 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR5]] 1574 // CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 1575 // CHECK2-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float 1576 // CHECK2-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] 1577 // CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR10]] 1578 // CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR10]] 1579 // CHECK2-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* 1580 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR5]] 1581 // CHECK2-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* 1582 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR5]] 1583 // CHECK2-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* 1584 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR5]] 1585 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 1586 // CHECK2: omp.body.continue: 1587 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 1588 // CHECK2: omp.inner.for.inc: 1589 // CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1590 // CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 1591 // CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1592 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] 1593 // CHECK2: omp.inner.for.end: 1594 // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] 1595 // CHECK2: omp.dispatch.inc: 1596 // CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 1597 // CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 1598 // CHECK2-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] 1599 // CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 1600 // CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1601 // CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 1602 // CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] 1603 // CHECK2-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1604 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] 1605 // CHECK2: omp.dispatch.end: 1606 // CHECK2-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1607 // CHECK2-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] 1608 // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP54]]) 1609 // CHECK2-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1610 // CHECK2-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] 1611 // CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 1612 // CHECK2-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* 1613 // CHECK2-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 1614 // CHECK2-NEXT: [[TMP59:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* 1615 // CHECK2-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) 1616 // CHECK2-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 1617 // CHECK2-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 1618 // CHECK2: .omp.reduction.then: 1619 // CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR10]] 1620 // CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) 1621 // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 1622 // CHECK2: .omp.reduction.done: 1623 // CHECK2-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* 1624 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] 1625 // CHECK2-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* 1626 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR5]] 1627 // CHECK2-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 1628 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] 1629 // CHECK2-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 1630 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] 1631 // CHECK2-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 1632 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] 1633 // CHECK2-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 1634 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] 1635 // CHECK2-NEXT: br label [[OMP_PRECOND_END]] 1636 // CHECK2: omp.precond.end: 1637 // CHECK2-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 1638 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] 1639 // CHECK2-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 1640 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] 1641 // CHECK2-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 1642 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] 1643 // CHECK2-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 1644 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] 1645 // CHECK2-NEXT: ret void 1646 // 1647 // 1648 // CHECK2-LABEL: define {{[^@]+}}@_ZNSt7complexIfEpLIfEERS0_RKS_IT_E 1649 // CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[__C:%.*]]) #[[ATTR4:[0-9]+]] comdat align 2 { 1650 // CHECK2-NEXT: entry: 1651 // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 1652 // CHECK2-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 1653 // CHECK2-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 1654 // CHECK2-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 1655 // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 1656 // CHECK2-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 1657 // CHECK2-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR10]] 1658 // CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 1659 // CHECK2-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] 1660 // CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] 1661 // CHECK2-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] 1662 // CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 1663 // CHECK2-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR10]] 1664 // CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 1665 // CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] 1666 // CHECK2-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] 1667 // CHECK2-NEXT: store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA18]] 1668 // CHECK2-NEXT: ret %"class.std::complex"* [[THIS1]] 1669 // 1670 // 1671 // CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func 1672 // CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR6:[0-9]+]] { 1673 // CHECK2-NEXT: entry: 1674 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 1675 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 1676 // CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 1677 // CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 1678 // CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 1679 // CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex", align 4 1680 // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 1681 // CHECK2-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19:![0-9]+]] 1682 // CHECK2-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 1683 // CHECK2-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 1684 // CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 1685 // CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* 1686 // CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] 1687 // CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 1688 // CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 1689 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 1690 // CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 1691 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 1692 // CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex"* 1693 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex", %"class.std::complex"* [[TMP12]], i64 1 1694 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex"* [[TMP13]] to i8* 1695 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex"* [[TMP12]] to i64* 1696 // CHECK2-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* 1697 // CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 4 1698 // CHECK2-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() 1699 // CHECK2-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 1700 // CHECK2-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) 1701 // CHECK2-NEXT: store i64 [[TMP20]], i64* [[TMP16]], align 4 1702 // CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 1703 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 1704 // CHECK2-NEXT: [[TMP23:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* 1705 // CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] 1706 // CHECK2-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 1707 // CHECK2-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 1708 // CHECK2-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] 1709 // CHECK2-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] 1710 // CHECK2-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 1711 // CHECK2-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 1712 // CHECK2-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 1713 // CHECK2-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] 1714 // CHECK2-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 1715 // CHECK2-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] 1716 // CHECK2-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] 1717 // CHECK2-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] 1718 // CHECK2-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] 1719 // CHECK2: then: 1720 // CHECK2-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* 1721 // CHECK2-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* 1722 // CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP36]], i8* [[TMP37]]) #[[ATTR5]] 1723 // CHECK2-NEXT: br label [[IFCONT:%.*]] 1724 // CHECK2: else: 1725 // CHECK2-NEXT: br label [[IFCONT]] 1726 // CHECK2: ifcont: 1727 // CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 1728 // CHECK2-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] 1729 // CHECK2-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] 1730 // CHECK2-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 1731 // CHECK2: then4: 1732 // CHECK2-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 1733 // CHECK2-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8 1734 // CHECK2-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 1735 // CHECK2-NEXT: [[TMP44:%.*]] = load i8*, i8** [[TMP43]], align 8 1736 // CHECK2-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %"class.std::complex"* 1737 // CHECK2-NEXT: [[TMP46:%.*]] = bitcast i8* [[TMP44]] to %"class.std::complex"* 1738 // CHECK2-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP46]] to i8* 1739 // CHECK2-NEXT: [[TMP48:%.*]] = bitcast %"class.std::complex"* [[TMP45]] to i8* 1740 // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 8, i1 false), !tbaa.struct !21 1741 // CHECK2-NEXT: br label [[IFCONT6:%.*]] 1742 // CHECK2: else5: 1743 // CHECK2-NEXT: br label [[IFCONT6]] 1744 // CHECK2: ifcont6: 1745 // CHECK2-NEXT: ret void 1746 // 1747 // 1748 // CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func 1749 // CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { 1750 // CHECK2-NEXT: entry: 1751 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 1752 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1753 // CHECK2-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 1754 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 1755 // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 1756 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 1757 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1758 // CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1759 // CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 1760 // CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1761 // CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 1762 // CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 1763 // CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* 1764 // CHECK2-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 1765 // CHECK2-NEXT: br label [[PRECOND:%.*]] 1766 // CHECK2: precond: 1767 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 1768 // CHECK2-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 1769 // CHECK2-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] 1770 // CHECK2: body: 1771 // CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) 1772 // CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 1773 // CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 1774 // CHECK2: then: 1775 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 1776 // CHECK2-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8, !tbaa [[TBAA12]] 1777 // CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* 1778 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] 1779 // CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 1780 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 1781 // CHECK2-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 1782 // CHECK2-NEXT: br label [[IFCONT:%.*]] 1783 // CHECK2: else: 1784 // CHECK2-NEXT: br label [[IFCONT]] 1785 // CHECK2: ifcont: 1786 // CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) 1787 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 1788 // CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] 1789 // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 1790 // CHECK2: then2: 1791 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 1792 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 1793 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8, !tbaa [[TBAA12]] 1794 // CHECK2-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* 1795 // CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] 1796 // CHECK2-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4, !tbaa [[TBAA8]] 1797 // CHECK2-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !tbaa [[TBAA8]] 1798 // CHECK2-NEXT: br label [[IFCONT4:%.*]] 1799 // CHECK2: else3: 1800 // CHECK2-NEXT: br label [[IFCONT4]] 1801 // CHECK2: ifcont4: 1802 // CHECK2-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 1803 // CHECK2-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 1804 // CHECK2-NEXT: br label [[PRECOND]] 1805 // CHECK2: exit: 1806 // CHECK2-NEXT: ret void 1807 // 1808 // 1809 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 1810 // CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { 1811 // CHECK2-NEXT: entry: 1812 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1813 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1814 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1815 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1816 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] 1817 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 1818 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 1819 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1820 // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1821 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 1822 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** 1823 // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] 1824 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 1825 // CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** 1826 // CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] 1827 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 1828 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex"** 1829 // CHECK2-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8, !tbaa [[TBAA12]] 1830 // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR5]] 1831 // CHECK2-NEXT: ret void 1832 // 1833 // 1834 // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16 1835 // CHECK2-SAME: () #[[ATTR0]] { 1836 // CHECK2-NEXT: entry: 1837 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1838 // CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 1839 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) 1840 // CHECK2-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 1841 // CHECK2-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 1842 // CHECK2: user_code.entry: 1843 // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 1844 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 1845 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] 1846 // CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]] 1847 // CHECK2-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 1848 // CHECK2-NEXT: ret void 1849 // CHECK2: worker.exit: 1850 // CHECK2-NEXT: ret void 1851 // 1852 // 1853 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 1854 // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 1855 // CHECK2-NEXT: entry: 1856 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 1857 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 1858 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 1859 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 1860 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 1861 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 1862 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 1863 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 1864 // CHECK2-NEXT: [[IB:%.*]] = alloca i32, align 4 1865 // CHECK2-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 1866 // CHECK2-NEXT: [[REF_TMP2:%.*]] = alloca double, align 8 1867 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 1868 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] 1869 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 1870 // CHECK2-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 1871 // CHECK2-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* 1872 // CHECK2-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 1873 // CHECK2-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* 1874 // CHECK2-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 16) 1875 // CHECK2-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex.0"* 1876 // CHECK2-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 1877 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] 1878 // CHECK2-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 1879 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR5]] 1880 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 1881 // CHECK2-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 1882 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR5]] 1883 // CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1884 // CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 1885 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] 1886 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 1887 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 1888 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] 1889 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 1890 // CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* 1891 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] 1892 // CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 1893 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] 1894 // CHECK2-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 1895 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1896 // CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 1897 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 1898 // CHECK2: cond.true: 1899 // CHECK2-NEXT: br label [[COND_END:%.*]] 1900 // CHECK2: cond.false: 1901 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1902 // CHECK2-NEXT: br label [[COND_END]] 1903 // CHECK2: cond.end: 1904 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] 1905 // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1906 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 1907 // CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1908 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 1909 // CHECK2: omp.inner.for.cond: 1910 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1911 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 1912 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] 1913 // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 1914 // CHECK2: omp.inner.for.cond.cleanup: 1915 // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 1916 // CHECK2: omp.inner.for.body: 1917 // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1918 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 1919 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 1920 // CHECK2-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] 1921 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast double* [[REF_TMP]] to i8* 1922 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP14]]) #[[ATTR5]] 1923 // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22:![0-9]+]] 1924 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* 1925 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR5]] 1926 // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] 1927 // CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR10]] 1928 // CHECK2-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* 1929 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR5]] 1930 // CHECK2-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* 1931 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP17]]) #[[ATTR5]] 1932 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 1933 // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 1934 // CHECK2-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] 1935 // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 1936 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 1937 // CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 1938 // CHECK2-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] 1939 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 1940 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* 1941 // CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] 1942 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 1943 // CHECK2-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* 1944 // CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] 1945 // CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 1946 // CHECK2-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM_ON_STACK]] to i8* 1947 // CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] 1948 // CHECK2-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 1949 // CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex.0"*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP26]], i64 3) 1950 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 1951 // CHECK2: omp.body.continue: 1952 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 1953 // CHECK2: omp.inner.for.inc: 1954 // CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1955 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 1956 // CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 1957 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] 1958 // CHECK2: omp.inner.for.end: 1959 // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 1960 // CHECK2: omp.loop.exit: 1961 // CHECK2-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) 1962 // CHECK2-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* 1963 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR5]] 1964 // CHECK2-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 1965 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR5]] 1966 // CHECK2-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 1967 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR5]] 1968 // CHECK2-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 1969 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR5]] 1970 // CHECK2-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 1971 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR5]] 1972 // CHECK2-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 1973 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] 1974 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 16) 1975 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) 1976 // CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) 1977 // CHECK2-NEXT: ret void 1978 // 1979 // 1980 // CHECK2-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC1ERKdS2_ 1981 // CHECK2-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { 1982 // CHECK2-NEXT: entry: 1983 // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 1984 // CHECK2-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 1985 // CHECK2-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 1986 // CHECK2-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 1987 // CHECK2-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 1988 // CHECK2-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 1989 // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 1990 // CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 1991 // CHECK2-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 1992 // CHECK2-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR10]] 1993 // CHECK2-NEXT: ret void 1994 // 1995 // 1996 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 1997 // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR0]] { 1998 // CHECK2-NEXT: entry: 1999 // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 2000 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 2001 // CHECK2-NEXT: [[ISTART_ADDR:%.*]] = alloca i32*, align 8 2002 // CHECK2-NEXT: [[IEND_ADDR:%.*]] = alloca i32*, align 8 2003 // CHECK2-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 2004 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2005 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 2006 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 2007 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 2008 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 2009 // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 2010 // CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 2011 // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 2012 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2013 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2014 // CHECK2-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex.0", align 8 2015 // CHECK2-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 2016 // CHECK2-NEXT: [[REF_TMP6:%.*]] = alloca double, align 8 2017 // CHECK2-NEXT: [[I7:%.*]] = alloca i32, align 4 2018 // CHECK2-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex.0", align 8 2019 // CHECK2-NEXT: [[REF_TMP15:%.*]] = alloca double, align 8 2020 // CHECK2-NEXT: [[REF_TMP16:%.*]] = alloca double, align 8 2021 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 2022 // CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] 2023 // CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 2024 // CHECK2-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 2025 // CHECK2-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 2026 // CHECK2-NEXT: store %"class.std::complex.0"* [[PARTIAL_SUM]], %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 2027 // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 2028 // CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 2029 // CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 2030 // CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 2031 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] 2032 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 2033 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] 2034 // CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] 2035 // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 2036 // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 2037 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] 2038 // CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] 2039 // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 2040 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 2041 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] 2042 // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 2043 // CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 2044 // CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] 2045 // CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 2046 // CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 2047 // CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 2048 // CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 2049 // CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 2050 // CHECK2-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* 2051 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] 2052 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 2053 // CHECK2-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] 2054 // CHECK2-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* 2055 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] 2056 // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 2057 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 2058 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] 2059 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 2060 // CHECK2: omp.precond.then: 2061 // CHECK2-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 2062 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] 2063 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 2064 // CHECK2-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 2065 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] 2066 // CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 2067 // CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2068 // CHECK2-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 2069 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] 2070 // CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 2071 // CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 2072 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] 2073 // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 2074 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* 2075 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR5]] 2076 // CHECK2-NEXT: [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8* 2077 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR5]] 2078 // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22]] 2079 // CHECK2-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* 2080 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR5]] 2081 // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] 2082 // CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR10]] 2083 // CHECK2-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* 2084 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR5]] 2085 // CHECK2-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* 2086 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR5]] 2087 // CHECK2-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* 2088 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] 2089 // CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 2090 // CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] 2091 // CHECK2-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 2092 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] 2093 // CHECK2: omp.dispatch.cond: 2094 // CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2095 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 2096 // CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] 2097 // CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2098 // CHECK2: cond.true: 2099 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 2100 // CHECK2-NEXT: br label [[COND_END:%.*]] 2101 // CHECK2: cond.false: 2102 // CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2103 // CHECK2-NEXT: br label [[COND_END]] 2104 // CHECK2: cond.end: 2105 // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] 2106 // CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2107 // CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 2108 // CHECK2-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2109 // CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2110 // CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2111 // CHECK2-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 2112 // CHECK2-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] 2113 // CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] 2114 // CHECK2: omp.dispatch.cleanup: 2115 // CHECK2-NEXT: br label [[OMP_DISPATCH_END:%.*]] 2116 // CHECK2: omp.dispatch.body: 2117 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2118 // CHECK2: omp.inner.for.cond: 2119 // CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2120 // CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2121 // CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 2122 // CHECK2-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] 2123 // CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 2124 // CHECK2: omp.inner.for.cond.cleanup: 2125 // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 2126 // CHECK2: omp.inner.for.body: 2127 // CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 2128 // CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2129 // CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 2130 // CHECK2-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] 2131 // CHECK2-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] 2132 // CHECK2-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* 2133 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR5]] 2134 // CHECK2-NEXT: [[TMP41:%.*]] = bitcast double* [[REF_TMP15]] to i8* 2135 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR5]] 2136 // CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 2137 // CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to double 2138 // CHECK2-NEXT: store double [[CONV]], double* [[REF_TMP15]], align 8, !tbaa [[TBAA22]] 2139 // CHECK2-NEXT: [[TMP43:%.*]] = bitcast double* [[REF_TMP16]] to i8* 2140 // CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR5]] 2141 // CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 2142 // CHECK2-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double 2143 // CHECK2-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] 2144 // CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR10]] 2145 // CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR10]] 2146 // CHECK2-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* 2147 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR5]] 2148 // CHECK2-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* 2149 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR5]] 2150 // CHECK2-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* 2151 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR5]] 2152 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 2153 // CHECK2: omp.body.continue: 2154 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2155 // CHECK2: omp.inner.for.inc: 2156 // CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2157 // CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 2158 // CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2159 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] 2160 // CHECK2: omp.inner.for.end: 2161 // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] 2162 // CHECK2: omp.dispatch.inc: 2163 // CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 2164 // CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 2165 // CHECK2-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] 2166 // CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 2167 // CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2168 // CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 2169 // CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] 2170 // CHECK2-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2171 // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] 2172 // CHECK2: omp.dispatch.end: 2173 // CHECK2-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 2174 // CHECK2-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] 2175 // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP54]]) 2176 // CHECK2-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 2177 // CHECK2-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] 2178 // CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 2179 // CHECK2-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* 2180 // CHECK2-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 2181 // CHECK2-NEXT: [[TMP59:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* 2182 // CHECK2-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func5, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func6) 2183 // CHECK2-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 2184 // CHECK2-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 2185 // CHECK2: .omp.reduction.then: 2186 // CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR10]] 2187 // CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) 2188 // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 2189 // CHECK2: .omp.reduction.done: 2190 // CHECK2-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* 2191 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] 2192 // CHECK2-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* 2193 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR5]] 2194 // CHECK2-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 2195 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] 2196 // CHECK2-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 2197 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] 2198 // CHECK2-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 2199 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] 2200 // CHECK2-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 2201 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] 2202 // CHECK2-NEXT: br label [[OMP_PRECOND_END]] 2203 // CHECK2: omp.precond.end: 2204 // CHECK2-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 2205 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] 2206 // CHECK2-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 2207 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] 2208 // CHECK2-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 2209 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] 2210 // CHECK2-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 2211 // CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] 2212 // CHECK2-NEXT: ret void 2213 // 2214 // 2215 // CHECK2-LABEL: define {{[^@]+}}@_ZNSt7complexIdEpLIdEERS0_RKS_IT_E 2216 // CHECK2-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR4]] comdat align 2 { 2217 // CHECK2-NEXT: entry: 2218 // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 2219 // CHECK2-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 2220 // CHECK2-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 2221 // CHECK2-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 2222 // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 2223 // CHECK2-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 2224 // CHECK2-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR10]] 2225 // CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 2226 // CHECK2-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] 2227 // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] 2228 // CHECK2-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] 2229 // CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 2230 // CHECK2-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR10]] 2231 // CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 2232 // CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] 2233 // CHECK2-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] 2234 // CHECK2-NEXT: store double [[ADD3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] 2235 // CHECK2-NEXT: ret %"class.std::complex.0"* [[THIS1]] 2236 // 2237 // 2238 // CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 2239 // CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR6]] { 2240 // CHECK2-NEXT: entry: 2241 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 2242 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 2243 // CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 2244 // CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 2245 // CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 2246 // CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.0", align 8 2247 // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 2248 // CHECK2-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] 2249 // CHECK2-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 2250 // CHECK2-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 2251 // CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 2252 // CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* 2253 // CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] 2254 // CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 2255 // CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 2256 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 2257 // CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 2258 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 2259 // CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex.0"* 2260 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.0", %"class.std::complex.0"* [[TMP12]], i64 1 2261 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex.0"* [[TMP13]] to i8* 2262 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.0"* [[TMP12]] to i64* 2263 // CHECK2-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* 2264 // CHECK2-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] 2265 // CHECK2: .shuffle.pre_cond: 2266 // CHECK2-NEXT: [[TMP17:%.*]] = phi i64* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[TMP29:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] 2267 // CHECK2-NEXT: [[TMP18:%.*]] = phi i64* [ [[TMP16]], [[ENTRY]] ], [ [[TMP30:%.*]], [[DOTSHUFFLE_THEN]] ] 2268 // CHECK2-NEXT: [[TMP19:%.*]] = bitcast i64* [[TMP17]] to i8* 2269 // CHECK2-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[TMP14]] to i64 2270 // CHECK2-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[TMP19]] to i64 2271 // CHECK2-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] 2272 // CHECK2-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) 2273 // CHECK2-NEXT: [[TMP24:%.*]] = icmp sgt i64 [[TMP23]], 7 2274 // CHECK2-NEXT: br i1 [[TMP24]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] 2275 // CHECK2: .shuffle.then: 2276 // CHECK2-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP17]], align 8 2277 // CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_get_warp_size() 2278 // CHECK2-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 2279 // CHECK2-NEXT: [[TMP28:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP25]], i16 [[TMP7]], i16 [[TMP27]]) 2280 // CHECK2-NEXT: store i64 [[TMP28]], i64* [[TMP18]], align 8 2281 // CHECK2-NEXT: [[TMP29]] = getelementptr i64, i64* [[TMP17]], i64 1 2282 // CHECK2-NEXT: [[TMP30]] = getelementptr i64, i64* [[TMP18]], i64 1 2283 // CHECK2-NEXT: br label [[DOTSHUFFLE_PRE_COND]] 2284 // CHECK2: .shuffle.exit: 2285 // CHECK2-NEXT: [[TMP31:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* 2286 // CHECK2-NEXT: store i8* [[TMP31]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] 2287 // CHECK2-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 0 2288 // CHECK2-NEXT: [[TMP33:%.*]] = icmp eq i16 [[TMP8]], 1 2289 // CHECK2-NEXT: [[TMP34:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] 2290 // CHECK2-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] 2291 // CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 2 2292 // CHECK2-NEXT: [[TMP37:%.*]] = and i16 [[TMP6]], 1 2293 // CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP37]], 0 2294 // CHECK2-NEXT: [[TMP39:%.*]] = and i1 [[TMP36]], [[TMP38]] 2295 // CHECK2-NEXT: [[TMP40:%.*]] = icmp sgt i16 [[TMP7]], 0 2296 // CHECK2-NEXT: [[TMP41:%.*]] = and i1 [[TMP39]], [[TMP40]] 2297 // CHECK2-NEXT: [[TMP42:%.*]] = or i1 [[TMP32]], [[TMP35]] 2298 // CHECK2-NEXT: [[TMP43:%.*]] = or i1 [[TMP42]], [[TMP41]] 2299 // CHECK2-NEXT: br i1 [[TMP43]], label [[THEN:%.*]], label [[ELSE:%.*]] 2300 // CHECK2: then: 2301 // CHECK2-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* 2302 // CHECK2-NEXT: [[TMP45:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* 2303 // CHECK2-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP44]], i8* [[TMP45]]) #[[ATTR5]] 2304 // CHECK2-NEXT: br label [[IFCONT:%.*]] 2305 // CHECK2: else: 2306 // CHECK2-NEXT: br label [[IFCONT]] 2307 // CHECK2: ifcont: 2308 // CHECK2-NEXT: [[TMP46:%.*]] = icmp eq i16 [[TMP8]], 1 2309 // CHECK2-NEXT: [[TMP47:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] 2310 // CHECK2-NEXT: [[TMP48:%.*]] = and i1 [[TMP46]], [[TMP47]] 2311 // CHECK2-NEXT: br i1 [[TMP48]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 2312 // CHECK2: then4: 2313 // CHECK2-NEXT: [[TMP49:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 2314 // CHECK2-NEXT: [[TMP50:%.*]] = load i8*, i8** [[TMP49]], align 8 2315 // CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 2316 // CHECK2-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 8 2317 // CHECK2-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP50]] to %"class.std::complex.0"* 2318 // CHECK2-NEXT: [[TMP54:%.*]] = bitcast i8* [[TMP52]] to %"class.std::complex.0"* 2319 // CHECK2-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP54]] to i8* 2320 // CHECK2-NEXT: [[TMP56:%.*]] = bitcast %"class.std::complex.0"* [[TMP53]] to i8* 2321 // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP55]], i8* align 8 [[TMP56]], i64 16, i1 false), !tbaa.struct !27 2322 // CHECK2-NEXT: br label [[IFCONT6:%.*]] 2323 // CHECK2: else5: 2324 // CHECK2-NEXT: br label [[IFCONT6]] 2325 // CHECK2: ifcont6: 2326 // CHECK2-NEXT: ret void 2327 // 2328 // 2329 // CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 2330 // CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { 2331 // CHECK2-NEXT: entry: 2332 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 2333 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2334 // CHECK2-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 2335 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 2336 // CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 2337 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 2338 // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 2339 // CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 2340 // CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 2341 // CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 2342 // CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 2343 // CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 2344 // CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* 2345 // CHECK2-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 2346 // CHECK2-NEXT: br label [[PRECOND:%.*]] 2347 // CHECK2: precond: 2348 // CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 2349 // CHECK2-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 4 2350 // CHECK2-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] 2351 // CHECK2: body: 2352 // CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) 2353 // CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 2354 // CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 2355 // CHECK2: then: 2356 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 2357 // CHECK2-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8, !tbaa [[TBAA12]] 2358 // CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* 2359 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] 2360 // CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 2361 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 2362 // CHECK2-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 2363 // CHECK2-NEXT: br label [[IFCONT:%.*]] 2364 // CHECK2: else: 2365 // CHECK2-NEXT: br label [[IFCONT]] 2366 // CHECK2: ifcont: 2367 // CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) 2368 // CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 2369 // CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] 2370 // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 2371 // CHECK2: then2: 2372 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 2373 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 2374 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8, !tbaa [[TBAA12]] 2375 // CHECK2-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* 2376 // CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] 2377 // CHECK2-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4, !tbaa [[TBAA8]] 2378 // CHECK2-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !tbaa [[TBAA8]] 2379 // CHECK2-NEXT: br label [[IFCONT4:%.*]] 2380 // CHECK2: else3: 2381 // CHECK2-NEXT: br label [[IFCONT4]] 2382 // CHECK2: ifcont4: 2383 // CHECK2-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 2384 // CHECK2-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 2385 // CHECK2-NEXT: br label [[PRECOND]] 2386 // CHECK2: exit: 2387 // CHECK2-NEXT: ret void 2388 // 2389 // 2390 // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 2391 // CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { 2392 // CHECK2-NEXT: entry: 2393 // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 2394 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2395 // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2396 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 2397 // CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] 2398 // CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 2399 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 2400 // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 2401 // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 2402 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 2403 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** 2404 // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] 2405 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 2406 // CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** 2407 // CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] 2408 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 2409 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** 2410 // CHECK2-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8, !tbaa [[TBAA12]] 2411 // CHECK2-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.0"* [[TMP11]]) #[[ATTR5]] 2412 // CHECK2-NEXT: ret void 2413 // 2414 // 2415 // CHECK2-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC2ERKfS2_ 2416 // CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { 2417 // CHECK2-NEXT: entry: 2418 // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 2419 // CHECK2-NEXT: [[__RE_ADDR:%.*]] = alloca float*, align 8 2420 // CHECK2-NEXT: [[__IM_ADDR:%.*]] = alloca float*, align 8 2421 // CHECK2-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 2422 // CHECK2-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 2423 // CHECK2-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 2424 // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 2425 // CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 2426 // CHECK2-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 2427 // CHECK2-NEXT: [[TMP1:%.*]] = load float, float* [[TMP0]], align 4, !tbaa [[TBAA14]] 2428 // CHECK2-NEXT: store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] 2429 // CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 2430 // CHECK2-NEXT: [[TMP2:%.*]] = load float*, float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 2431 // CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[TMP2]], align 4, !tbaa [[TBAA14]] 2432 // CHECK2-NEXT: store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA18]] 2433 // CHECK2-NEXT: ret void 2434 // 2435 // 2436 // CHECK2-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4realEv 2437 // CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { 2438 // CHECK2-NEXT: entry: 2439 // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 2440 // CHECK2-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 2441 // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 2442 // CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 2443 // CHECK2-NEXT: [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16]] 2444 // CHECK2-NEXT: ret float [[TMP0]] 2445 // 2446 // 2447 // CHECK2-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4imagEv 2448 // CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { 2449 // CHECK2-NEXT: entry: 2450 // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 2451 // CHECK2-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 2452 // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 2453 // CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 2454 // CHECK2-NEXT: [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18]] 2455 // CHECK2-NEXT: ret float [[TMP0]] 2456 // 2457 // 2458 // CHECK2-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC2ERKdS2_ 2459 // CHECK2-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { 2460 // CHECK2-NEXT: entry: 2461 // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 2462 // CHECK2-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 2463 // CHECK2-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 2464 // CHECK2-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 2465 // CHECK2-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 2466 // CHECK2-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 2467 // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 2468 // CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 2469 // CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 2470 // CHECK2-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8, !tbaa [[TBAA22]] 2471 // CHECK2-NEXT: store double [[TMP1]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] 2472 // CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 2473 // CHECK2-NEXT: [[TMP2:%.*]] = load double*, double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 2474 // CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8, !tbaa [[TBAA22]] 2475 // CHECK2-NEXT: store double [[TMP3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] 2476 // CHECK2-NEXT: ret void 2477 // 2478 // 2479 // CHECK2-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4realEv 2480 // CHECK2-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { 2481 // CHECK2-NEXT: entry: 2482 // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 2483 // CHECK2-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 2484 // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 2485 // CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 2486 // CHECK2-NEXT: [[TMP0:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24]] 2487 // CHECK2-NEXT: ret double [[TMP0]] 2488 // 2489 // 2490 // CHECK2-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4imagEv 2491 // CHECK2-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { 2492 // CHECK2-NEXT: entry: 2493 // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 2494 // CHECK2-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 2495 // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 2496 // CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 2497 // CHECK2-NEXT: [[TMP0:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26]] 2498 // CHECK2-NEXT: ret double [[TMP0]] 2499 // 2500 // 2501 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16 2502 // CHECK3-SAME: () #[[ATTR0:[0-9]+]] { 2503 // CHECK3-NEXT: entry: 2504 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 2505 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 2506 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 2507 // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 2508 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 2509 // CHECK3: user_code.entry: 2510 // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 2511 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 2512 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] 2513 // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]] 2514 // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 2515 // CHECK3-NEXT: ret void 2516 // CHECK3: worker.exit: 2517 // CHECK3-NEXT: ret void 2518 // 2519 // 2520 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ 2521 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 2522 // CHECK3-NEXT: entry: 2523 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 2524 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 2525 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2526 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 2527 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 2528 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 2529 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2530 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2531 // CHECK3-NEXT: [[IB:%.*]] = alloca i32, align 4 2532 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 2533 // CHECK3-NEXT: [[REF_TMP2:%.*]] = alloca float, align 4 2534 // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 2535 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12:![0-9]+]] 2536 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 2537 // CHECK3-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 2538 // CHECK3-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* 2539 // CHECK3-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 2540 // CHECK3-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* 2541 // CHECK3-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 8) 2542 // CHECK3-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* 2543 // CHECK3-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 2544 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] 2545 // CHECK3-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 2546 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR5]] 2547 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 2548 // CHECK3-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 2549 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR5]] 2550 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2551 // CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 2552 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] 2553 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 2554 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 2555 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] 2556 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 2557 // CHECK3-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* 2558 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] 2559 // CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 2560 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] 2561 // CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 2562 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2563 // CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 2564 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2565 // CHECK3: cond.true: 2566 // CHECK3-NEXT: br label [[COND_END:%.*]] 2567 // CHECK3: cond.false: 2568 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2569 // CHECK3-NEXT: br label [[COND_END]] 2570 // CHECK3: cond.end: 2571 // CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] 2572 // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2573 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 2574 // CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2575 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2576 // CHECK3: omp.inner.for.cond: 2577 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2578 // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2579 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] 2580 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 2581 // CHECK3: omp.inner.for.cond.cleanup: 2582 // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 2583 // CHECK3: omp.inner.for.body: 2584 // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2585 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 2586 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 2587 // CHECK3-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] 2588 // CHECK3-NEXT: [[TMP14:%.*]] = bitcast float* [[REF_TMP]] to i8* 2589 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP14]]) #[[ATTR5]] 2590 // CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14:![0-9]+]] 2591 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* 2592 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR5]] 2593 // CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] 2594 // CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR10:[0-9]+]] 2595 // CHECK3-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* 2596 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] 2597 // CHECK3-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* 2598 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] 2599 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 2600 // CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 2601 // CHECK3-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] 2602 // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 2603 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 2604 // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 2605 // CHECK3-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] 2606 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 2607 // CHECK3-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* 2608 // CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] 2609 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 2610 // CHECK3-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* 2611 // CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] 2612 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 2613 // CHECK3-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM_ON_STACK]] to i8* 2614 // CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] 2615 // CHECK3-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 2616 // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex"*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP26]], i64 3) 2617 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 2618 // CHECK3: omp.body.continue: 2619 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2620 // CHECK3: omp.inner.for.inc: 2621 // CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2622 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 2623 // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2624 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] 2625 // CHECK3: omp.inner.for.end: 2626 // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 2627 // CHECK3: omp.loop.exit: 2628 // CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) 2629 // CHECK3-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* 2630 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR5]] 2631 // CHECK3-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 2632 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR5]] 2633 // CHECK3-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 2634 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR5]] 2635 // CHECK3-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 2636 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR5]] 2637 // CHECK3-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 2638 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR5]] 2639 // CHECK3-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 2640 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] 2641 // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 8) 2642 // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) 2643 // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) 2644 // CHECK3-NEXT: ret void 2645 // 2646 // 2647 // CHECK3-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC1ERKfS2_ 2648 // CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] comdat align 2 { 2649 // CHECK3-NEXT: entry: 2650 // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 2651 // CHECK3-NEXT: [[__RE_ADDR:%.*]] = alloca float*, align 8 2652 // CHECK3-NEXT: [[__IM_ADDR:%.*]] = alloca float*, align 8 2653 // CHECK3-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 2654 // CHECK3-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 2655 // CHECK3-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 2656 // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 2657 // CHECK3-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 2658 // CHECK3-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 2659 // CHECK3-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR10]] 2660 // CHECK3-NEXT: ret void 2661 // 2662 // 2663 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 2664 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM:%.*]]) #[[ATTR0]] { 2665 // CHECK3-NEXT: entry: 2666 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 2667 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 2668 // CHECK3-NEXT: [[ISTART_ADDR:%.*]] = alloca i32*, align 8 2669 // CHECK3-NEXT: [[IEND_ADDR:%.*]] = alloca i32*, align 8 2670 // CHECK3-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 2671 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 2672 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 2673 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 2674 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 2675 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 2676 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 2677 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 2678 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 2679 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 2680 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 2681 // CHECK3-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex", align 4 2682 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 2683 // CHECK3-NEXT: [[REF_TMP6:%.*]] = alloca float, align 4 2684 // CHECK3-NEXT: [[I7:%.*]] = alloca i32, align 4 2685 // CHECK3-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex", align 4 2686 // CHECK3-NEXT: [[REF_TMP15:%.*]] = alloca float, align 4 2687 // CHECK3-NEXT: [[REF_TMP16:%.*]] = alloca float, align 4 2688 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 2689 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] 2690 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 2691 // CHECK3-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 2692 // CHECK3-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 2693 // CHECK3-NEXT: store %"class.std::complex"* [[PARTIAL_SUM]], %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 2694 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 2695 // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 2696 // CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 2697 // CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 2698 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] 2699 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 2700 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] 2701 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] 2702 // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 2703 // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 2704 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] 2705 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] 2706 // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 2707 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 2708 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] 2709 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 2710 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 2711 // CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] 2712 // CHECK3-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 2713 // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 2714 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 2715 // CHECK3-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 2716 // CHECK3-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 2717 // CHECK3-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* 2718 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] 2719 // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 2720 // CHECK3-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] 2721 // CHECK3-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* 2722 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] 2723 // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 2724 // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 2725 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] 2726 // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 2727 // CHECK3: omp.precond.then: 2728 // CHECK3-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 2729 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] 2730 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 2731 // CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 2732 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] 2733 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 2734 // CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2735 // CHECK3-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 2736 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] 2737 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 2738 // CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 2739 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] 2740 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 2741 // CHECK3-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* 2742 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR5]] 2743 // CHECK3-NEXT: [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8* 2744 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR5]] 2745 // CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14]] 2746 // CHECK3-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* 2747 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR5]] 2748 // CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] 2749 // CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR10]] 2750 // CHECK3-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* 2751 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR5]] 2752 // CHECK3-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* 2753 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR5]] 2754 // CHECK3-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* 2755 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] 2756 // CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 2757 // CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] 2758 // CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 2759 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] 2760 // CHECK3: omp.dispatch.cond: 2761 // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2762 // CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 2763 // CHECK3-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] 2764 // CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 2765 // CHECK3: cond.true: 2766 // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 2767 // CHECK3-NEXT: br label [[COND_END:%.*]] 2768 // CHECK3: cond.false: 2769 // CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2770 // CHECK3-NEXT: br label [[COND_END]] 2771 // CHECK3: cond.end: 2772 // CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] 2773 // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2774 // CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 2775 // CHECK3-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2776 // CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2777 // CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2778 // CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 2779 // CHECK3-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] 2780 // CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] 2781 // CHECK3: omp.dispatch.cleanup: 2782 // CHECK3-NEXT: br label [[OMP_DISPATCH_END:%.*]] 2783 // CHECK3: omp.dispatch.body: 2784 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 2785 // CHECK3: omp.inner.for.cond: 2786 // CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2787 // CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2788 // CHECK3-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 2789 // CHECK3-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] 2790 // CHECK3-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 2791 // CHECK3: omp.inner.for.cond.cleanup: 2792 // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 2793 // CHECK3: omp.inner.for.body: 2794 // CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 2795 // CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2796 // CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 2797 // CHECK3-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] 2798 // CHECK3-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] 2799 // CHECK3-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* 2800 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR5]] 2801 // CHECK3-NEXT: [[TMP41:%.*]] = bitcast float* [[REF_TMP15]] to i8* 2802 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR5]] 2803 // CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 2804 // CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to float 2805 // CHECK3-NEXT: store float [[CONV]], float* [[REF_TMP15]], align 4, !tbaa [[TBAA14]] 2806 // CHECK3-NEXT: [[TMP43:%.*]] = bitcast float* [[REF_TMP16]] to i8* 2807 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR5]] 2808 // CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 2809 // CHECK3-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float 2810 // CHECK3-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] 2811 // CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR10]] 2812 // CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR10]] 2813 // CHECK3-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* 2814 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR5]] 2815 // CHECK3-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* 2816 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR5]] 2817 // CHECK3-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* 2818 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR5]] 2819 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 2820 // CHECK3: omp.body.continue: 2821 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 2822 // CHECK3: omp.inner.for.inc: 2823 // CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2824 // CHECK3-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 2825 // CHECK3-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 2826 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] 2827 // CHECK3: omp.inner.for.end: 2828 // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] 2829 // CHECK3: omp.dispatch.inc: 2830 // CHECK3-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 2831 // CHECK3-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 2832 // CHECK3-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] 2833 // CHECK3-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 2834 // CHECK3-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2835 // CHECK3-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 2836 // CHECK3-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] 2837 // CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 2838 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] 2839 // CHECK3: omp.dispatch.end: 2840 // CHECK3-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 2841 // CHECK3-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] 2842 // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP54]]) 2843 // CHECK3-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 2844 // CHECK3-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] 2845 // CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 2846 // CHECK3-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* 2847 // CHECK3-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 2848 // CHECK3-NEXT: [[TMP59:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* 2849 // CHECK3-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) 2850 // CHECK3-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 2851 // CHECK3-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 2852 // CHECK3: .omp.reduction.then: 2853 // CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR10]] 2854 // CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) 2855 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 2856 // CHECK3: .omp.reduction.done: 2857 // CHECK3-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* 2858 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] 2859 // CHECK3-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* 2860 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR5]] 2861 // CHECK3-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 2862 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] 2863 // CHECK3-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 2864 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] 2865 // CHECK3-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 2866 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] 2867 // CHECK3-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 2868 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] 2869 // CHECK3-NEXT: br label [[OMP_PRECOND_END]] 2870 // CHECK3: omp.precond.end: 2871 // CHECK3-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 2872 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] 2873 // CHECK3-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 2874 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] 2875 // CHECK3-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 2876 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] 2877 // CHECK3-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 2878 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] 2879 // CHECK3-NEXT: ret void 2880 // 2881 // 2882 // CHECK3-LABEL: define {{[^@]+}}@_ZNSt7complexIfEpLIfEERS0_RKS_IT_E 2883 // CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[__C:%.*]]) #[[ATTR4:[0-9]+]] comdat align 2 { 2884 // CHECK3-NEXT: entry: 2885 // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 2886 // CHECK3-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 2887 // CHECK3-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 2888 // CHECK3-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 2889 // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 2890 // CHECK3-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 2891 // CHECK3-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR10]] 2892 // CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 2893 // CHECK3-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] 2894 // CHECK3-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] 2895 // CHECK3-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] 2896 // CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 2897 // CHECK3-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR10]] 2898 // CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 2899 // CHECK3-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] 2900 // CHECK3-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] 2901 // CHECK3-NEXT: store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA18]] 2902 // CHECK3-NEXT: ret %"class.std::complex"* [[THIS1]] 2903 // 2904 // 2905 // CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func 2906 // CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR6:[0-9]+]] { 2907 // CHECK3-NEXT: entry: 2908 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 2909 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 2910 // CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 2911 // CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 2912 // CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 2913 // CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex", align 4 2914 // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 2915 // CHECK3-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19:![0-9]+]] 2916 // CHECK3-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 2917 // CHECK3-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 2918 // CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 2919 // CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* 2920 // CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] 2921 // CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 2922 // CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 2923 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 2924 // CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 2925 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 2926 // CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex"* 2927 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex", %"class.std::complex"* [[TMP12]], i64 1 2928 // CHECK3-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex"* [[TMP13]] to i8* 2929 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex"* [[TMP12]] to i64* 2930 // CHECK3-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* 2931 // CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 4 2932 // CHECK3-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() 2933 // CHECK3-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 2934 // CHECK3-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) 2935 // CHECK3-NEXT: store i64 [[TMP20]], i64* [[TMP16]], align 4 2936 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 2937 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 2938 // CHECK3-NEXT: [[TMP23:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* 2939 // CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] 2940 // CHECK3-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 2941 // CHECK3-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 2942 // CHECK3-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] 2943 // CHECK3-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] 2944 // CHECK3-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 2945 // CHECK3-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 2946 // CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 2947 // CHECK3-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] 2948 // CHECK3-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 2949 // CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] 2950 // CHECK3-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] 2951 // CHECK3-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] 2952 // CHECK3-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] 2953 // CHECK3: then: 2954 // CHECK3-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* 2955 // CHECK3-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* 2956 // CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP36]], i8* [[TMP37]]) #[[ATTR5]] 2957 // CHECK3-NEXT: br label [[IFCONT:%.*]] 2958 // CHECK3: else: 2959 // CHECK3-NEXT: br label [[IFCONT]] 2960 // CHECK3: ifcont: 2961 // CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 2962 // CHECK3-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] 2963 // CHECK3-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] 2964 // CHECK3-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 2965 // CHECK3: then4: 2966 // CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 2967 // CHECK3-NEXT: [[TMP42:%.*]] = load i8*, i8** [[TMP41]], align 8 2968 // CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 2969 // CHECK3-NEXT: [[TMP44:%.*]] = load i8*, i8** [[TMP43]], align 8 2970 // CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP42]] to %"class.std::complex"* 2971 // CHECK3-NEXT: [[TMP46:%.*]] = bitcast i8* [[TMP44]] to %"class.std::complex"* 2972 // CHECK3-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP46]] to i8* 2973 // CHECK3-NEXT: [[TMP48:%.*]] = bitcast %"class.std::complex"* [[TMP45]] to i8* 2974 // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 8, i1 false), !tbaa.struct !21 2975 // CHECK3-NEXT: br label [[IFCONT6:%.*]] 2976 // CHECK3: else5: 2977 // CHECK3-NEXT: br label [[IFCONT6]] 2978 // CHECK3: ifcont6: 2979 // CHECK3-NEXT: ret void 2980 // 2981 // 2982 // CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func 2983 // CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { 2984 // CHECK3-NEXT: entry: 2985 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 2986 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 2987 // CHECK3-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 2988 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 2989 // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 2990 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 2991 // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 2992 // CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 2993 // CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 2994 // CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 2995 // CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 2996 // CHECK3-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 2997 // CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* 2998 // CHECK3-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 2999 // CHECK3-NEXT: br label [[PRECOND:%.*]] 3000 // CHECK3: precond: 3001 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 3002 // CHECK3-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 3003 // CHECK3-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] 3004 // CHECK3: body: 3005 // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) 3006 // CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 3007 // CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 3008 // CHECK3: then: 3009 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 3010 // CHECK3-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8, !tbaa [[TBAA12]] 3011 // CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* 3012 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] 3013 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 3014 // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 3015 // CHECK3-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 3016 // CHECK3-NEXT: br label [[IFCONT:%.*]] 3017 // CHECK3: else: 3018 // CHECK3-NEXT: br label [[IFCONT]] 3019 // CHECK3: ifcont: 3020 // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) 3021 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 3022 // CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] 3023 // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 3024 // CHECK3: then2: 3025 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 3026 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 3027 // CHECK3-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8, !tbaa [[TBAA12]] 3028 // CHECK3-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* 3029 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] 3030 // CHECK3-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4, !tbaa [[TBAA8]] 3031 // CHECK3-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !tbaa [[TBAA8]] 3032 // CHECK3-NEXT: br label [[IFCONT4:%.*]] 3033 // CHECK3: else3: 3034 // CHECK3-NEXT: br label [[IFCONT4]] 3035 // CHECK3: ifcont4: 3036 // CHECK3-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 3037 // CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 3038 // CHECK3-NEXT: br label [[PRECOND]] 3039 // CHECK3: exit: 3040 // CHECK3-NEXT: ret void 3041 // 3042 // 3043 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 3044 // CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { 3045 // CHECK3-NEXT: entry: 3046 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3047 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3048 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3049 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 3050 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] 3051 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 3052 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 3053 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 3054 // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 3055 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 3056 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** 3057 // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] 3058 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 3059 // CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** 3060 // CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] 3061 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 3062 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex"** 3063 // CHECK3-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8, !tbaa [[TBAA12]] 3064 // CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR5]] 3065 // CHECK3-NEXT: ret void 3066 // 3067 // 3068 // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16 3069 // CHECK3-SAME: () #[[ATTR0]] { 3070 // CHECK3-NEXT: entry: 3071 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3072 // CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 3073 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) 3074 // CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 3075 // CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 3076 // CHECK3: user_code.entry: 3077 // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 3078 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 3079 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] 3080 // CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]] 3081 // CHECK3-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 3082 // CHECK3-NEXT: ret void 3083 // CHECK3: worker.exit: 3084 // CHECK3-NEXT: ret void 3085 // 3086 // 3087 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__2 3088 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { 3089 // CHECK3-NEXT: entry: 3090 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 3091 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 3092 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 3093 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 3094 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 3095 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 3096 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 3097 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 3098 // CHECK3-NEXT: [[IB:%.*]] = alloca i32, align 4 3099 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 3100 // CHECK3-NEXT: [[REF_TMP2:%.*]] = alloca double, align 8 3101 // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 3102 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] 3103 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 3104 // CHECK3-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 3105 // CHECK3-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* 3106 // CHECK3-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 3107 // CHECK3-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* 3108 // CHECK3-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 16) 3109 // CHECK3-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex.0"* 3110 // CHECK3-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 3111 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR5]] 3112 // CHECK3-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 3113 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR5]] 3114 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 3115 // CHECK3-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 3116 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR5]] 3117 // CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 3118 // CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 3119 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] 3120 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 3121 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 3122 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] 3123 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 3124 // CHECK3-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* 3125 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] 3126 // CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3127 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] 3128 // CHECK3-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 3129 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 3130 // CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 3131 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 3132 // CHECK3: cond.true: 3133 // CHECK3-NEXT: br label [[COND_END:%.*]] 3134 // CHECK3: cond.false: 3135 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 3136 // CHECK3-NEXT: br label [[COND_END]] 3137 // CHECK3: cond.end: 3138 // CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] 3139 // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 3140 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 3141 // CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 3142 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 3143 // CHECK3: omp.inner.for.cond: 3144 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 3145 // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 3146 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] 3147 // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 3148 // CHECK3: omp.inner.for.cond.cleanup: 3149 // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 3150 // CHECK3: omp.inner.for.body: 3151 // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 3152 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 3153 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 3154 // CHECK3-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] 3155 // CHECK3-NEXT: [[TMP14:%.*]] = bitcast double* [[REF_TMP]] to i8* 3156 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP14]]) #[[ATTR5]] 3157 // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22:![0-9]+]] 3158 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* 3159 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR5]] 3160 // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] 3161 // CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR10]] 3162 // CHECK3-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* 3163 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR5]] 3164 // CHECK3-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* 3165 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP17]]) #[[ATTR5]] 3166 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 3167 // CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 3168 // CHECK3-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] 3169 // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 3170 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 3171 // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 3172 // CHECK3-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] 3173 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 3174 // CHECK3-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* 3175 // CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] 3176 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 3177 // CHECK3-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* 3178 // CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] 3179 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 3180 // CHECK3-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM_ON_STACK]] to i8* 3181 // CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] 3182 // CHECK3-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 3183 // CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex.0"*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP26]], i64 3) 3184 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 3185 // CHECK3: omp.body.continue: 3186 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 3187 // CHECK3: omp.inner.for.inc: 3188 // CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 3189 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 3190 // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 3191 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] 3192 // CHECK3: omp.inner.for.end: 3193 // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 3194 // CHECK3: omp.loop.exit: 3195 // CHECK3-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) 3196 // CHECK3-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* 3197 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR5]] 3198 // CHECK3-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 3199 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR5]] 3200 // CHECK3-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 3201 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR5]] 3202 // CHECK3-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 3203 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR5]] 3204 // CHECK3-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 3205 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR5]] 3206 // CHECK3-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 3207 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] 3208 // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 16) 3209 // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) 3210 // CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) 3211 // CHECK3-NEXT: ret void 3212 // 3213 // 3214 // CHECK3-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC1ERKdS2_ 3215 // CHECK3-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { 3216 // CHECK3-NEXT: entry: 3217 // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 3218 // CHECK3-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 3219 // CHECK3-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 3220 // CHECK3-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 3221 // CHECK3-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 3222 // CHECK3-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 3223 // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 3224 // CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 3225 // CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 3226 // CHECK3-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR10]] 3227 // CHECK3-NEXT: ret void 3228 // 3229 // 3230 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3 3231 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR0]] { 3232 // CHECK3-NEXT: entry: 3233 // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 3234 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 3235 // CHECK3-NEXT: [[ISTART_ADDR:%.*]] = alloca i32*, align 8 3236 // CHECK3-NEXT: [[IEND_ADDR:%.*]] = alloca i32*, align 8 3237 // CHECK3-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 3238 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 3239 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 3240 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 3241 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 3242 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 3243 // CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 3244 // CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 3245 // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 3246 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 3247 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 3248 // CHECK3-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex.0", align 8 3249 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 3250 // CHECK3-NEXT: [[REF_TMP6:%.*]] = alloca double, align 8 3251 // CHECK3-NEXT: [[I7:%.*]] = alloca i32, align 4 3252 // CHECK3-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex.0", align 8 3253 // CHECK3-NEXT: [[REF_TMP15:%.*]] = alloca double, align 8 3254 // CHECK3-NEXT: [[REF_TMP16:%.*]] = alloca double, align 8 3255 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 3256 // CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] 3257 // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 3258 // CHECK3-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 3259 // CHECK3-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 3260 // CHECK3-NEXT: store %"class.std::complex.0"* [[PARTIAL_SUM]], %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 3261 // CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 3262 // CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 3263 // CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 3264 // CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 3265 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] 3266 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 3267 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] 3268 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] 3269 // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 3270 // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 3271 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] 3272 // CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] 3273 // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 3274 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 3275 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] 3276 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 3277 // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 3278 // CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] 3279 // CHECK3-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 3280 // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 3281 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 3282 // CHECK3-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 3283 // CHECK3-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 3284 // CHECK3-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* 3285 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] 3286 // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 3287 // CHECK3-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] 3288 // CHECK3-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* 3289 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] 3290 // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 3291 // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 3292 // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] 3293 // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 3294 // CHECK3: omp.precond.then: 3295 // CHECK3-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 3296 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] 3297 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 3298 // CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 3299 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] 3300 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 3301 // CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 3302 // CHECK3-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 3303 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] 3304 // CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 3305 // CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 3306 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] 3307 // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 3308 // CHECK3-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* 3309 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR5]] 3310 // CHECK3-NEXT: [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8* 3311 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR5]] 3312 // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22]] 3313 // CHECK3-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* 3314 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR5]] 3315 // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] 3316 // CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR10]] 3317 // CHECK3-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* 3318 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR5]] 3319 // CHECK3-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* 3320 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR5]] 3321 // CHECK3-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* 3322 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] 3323 // CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3324 // CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] 3325 // CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 3326 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] 3327 // CHECK3: omp.dispatch.cond: 3328 // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 3329 // CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 3330 // CHECK3-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] 3331 // CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 3332 // CHECK3: cond.true: 3333 // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 3334 // CHECK3-NEXT: br label [[COND_END:%.*]] 3335 // CHECK3: cond.false: 3336 // CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 3337 // CHECK3-NEXT: br label [[COND_END]] 3338 // CHECK3: cond.end: 3339 // CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] 3340 // CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 3341 // CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 3342 // CHECK3-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 3343 // CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 3344 // CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 3345 // CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 3346 // CHECK3-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] 3347 // CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] 3348 // CHECK3: omp.dispatch.cleanup: 3349 // CHECK3-NEXT: br label [[OMP_DISPATCH_END:%.*]] 3350 // CHECK3: omp.dispatch.body: 3351 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 3352 // CHECK3: omp.inner.for.cond: 3353 // CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 3354 // CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 3355 // CHECK3-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 3356 // CHECK3-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] 3357 // CHECK3-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 3358 // CHECK3: omp.inner.for.cond.cleanup: 3359 // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 3360 // CHECK3: omp.inner.for.body: 3361 // CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 3362 // CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 3363 // CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 3364 // CHECK3-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] 3365 // CHECK3-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] 3366 // CHECK3-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* 3367 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR5]] 3368 // CHECK3-NEXT: [[TMP41:%.*]] = bitcast double* [[REF_TMP15]] to i8* 3369 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR5]] 3370 // CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 3371 // CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to double 3372 // CHECK3-NEXT: store double [[CONV]], double* [[REF_TMP15]], align 8, !tbaa [[TBAA22]] 3373 // CHECK3-NEXT: [[TMP43:%.*]] = bitcast double* [[REF_TMP16]] to i8* 3374 // CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR5]] 3375 // CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 3376 // CHECK3-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double 3377 // CHECK3-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] 3378 // CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR10]] 3379 // CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR10]] 3380 // CHECK3-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* 3381 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR5]] 3382 // CHECK3-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* 3383 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR5]] 3384 // CHECK3-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* 3385 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR5]] 3386 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 3387 // CHECK3: omp.body.continue: 3388 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 3389 // CHECK3: omp.inner.for.inc: 3390 // CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 3391 // CHECK3-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 3392 // CHECK3-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 3393 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] 3394 // CHECK3: omp.inner.for.end: 3395 // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] 3396 // CHECK3: omp.dispatch.inc: 3397 // CHECK3-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 3398 // CHECK3-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 3399 // CHECK3-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] 3400 // CHECK3-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 3401 // CHECK3-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 3402 // CHECK3-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 3403 // CHECK3-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] 3404 // CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 3405 // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] 3406 // CHECK3: omp.dispatch.end: 3407 // CHECK3-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3408 // CHECK3-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] 3409 // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP54]]) 3410 // CHECK3-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 3411 // CHECK3-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] 3412 // CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 3413 // CHECK3-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* 3414 // CHECK3-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 3415 // CHECK3-NEXT: [[TMP59:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* 3416 // CHECK3-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func5, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func6) 3417 // CHECK3-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 3418 // CHECK3-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 3419 // CHECK3: .omp.reduction.then: 3420 // CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR10]] 3421 // CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) 3422 // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 3423 // CHECK3: .omp.reduction.done: 3424 // CHECK3-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* 3425 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] 3426 // CHECK3-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* 3427 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR5]] 3428 // CHECK3-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 3429 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] 3430 // CHECK3-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 3431 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] 3432 // CHECK3-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 3433 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] 3434 // CHECK3-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 3435 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] 3436 // CHECK3-NEXT: br label [[OMP_PRECOND_END]] 3437 // CHECK3: omp.precond.end: 3438 // CHECK3-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 3439 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] 3440 // CHECK3-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 3441 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] 3442 // CHECK3-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 3443 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] 3444 // CHECK3-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 3445 // CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] 3446 // CHECK3-NEXT: ret void 3447 // 3448 // 3449 // CHECK3-LABEL: define {{[^@]+}}@_ZNSt7complexIdEpLIdEERS0_RKS_IT_E 3450 // CHECK3-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR4]] comdat align 2 { 3451 // CHECK3-NEXT: entry: 3452 // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 3453 // CHECK3-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 3454 // CHECK3-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 3455 // CHECK3-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 3456 // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 3457 // CHECK3-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 3458 // CHECK3-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR10]] 3459 // CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 3460 // CHECK3-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] 3461 // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] 3462 // CHECK3-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] 3463 // CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 3464 // CHECK3-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR10]] 3465 // CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 3466 // CHECK3-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] 3467 // CHECK3-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] 3468 // CHECK3-NEXT: store double [[ADD3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] 3469 // CHECK3-NEXT: ret %"class.std::complex.0"* [[THIS1]] 3470 // 3471 // 3472 // CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 3473 // CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR6]] { 3474 // CHECK3-NEXT: entry: 3475 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 3476 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 3477 // CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 3478 // CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 3479 // CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 3480 // CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.0", align 8 3481 // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 3482 // CHECK3-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] 3483 // CHECK3-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 3484 // CHECK3-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 3485 // CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 3486 // CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* 3487 // CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] 3488 // CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 3489 // CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 3490 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 3491 // CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 3492 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 3493 // CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex.0"* 3494 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.0", %"class.std::complex.0"* [[TMP12]], i64 1 3495 // CHECK3-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex.0"* [[TMP13]] to i8* 3496 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.0"* [[TMP12]] to i64* 3497 // CHECK3-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* 3498 // CHECK3-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] 3499 // CHECK3: .shuffle.pre_cond: 3500 // CHECK3-NEXT: [[TMP17:%.*]] = phi i64* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[TMP29:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] 3501 // CHECK3-NEXT: [[TMP18:%.*]] = phi i64* [ [[TMP16]], [[ENTRY]] ], [ [[TMP30:%.*]], [[DOTSHUFFLE_THEN]] ] 3502 // CHECK3-NEXT: [[TMP19:%.*]] = bitcast i64* [[TMP17]] to i8* 3503 // CHECK3-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[TMP14]] to i64 3504 // CHECK3-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[TMP19]] to i64 3505 // CHECK3-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] 3506 // CHECK3-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) 3507 // CHECK3-NEXT: [[TMP24:%.*]] = icmp sgt i64 [[TMP23]], 7 3508 // CHECK3-NEXT: br i1 [[TMP24]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] 3509 // CHECK3: .shuffle.then: 3510 // CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP17]], align 8 3511 // CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_get_warp_size() 3512 // CHECK3-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 3513 // CHECK3-NEXT: [[TMP28:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP25]], i16 [[TMP7]], i16 [[TMP27]]) 3514 // CHECK3-NEXT: store i64 [[TMP28]], i64* [[TMP18]], align 8 3515 // CHECK3-NEXT: [[TMP29]] = getelementptr i64, i64* [[TMP17]], i64 1 3516 // CHECK3-NEXT: [[TMP30]] = getelementptr i64, i64* [[TMP18]], i64 1 3517 // CHECK3-NEXT: br label [[DOTSHUFFLE_PRE_COND]] 3518 // CHECK3: .shuffle.exit: 3519 // CHECK3-NEXT: [[TMP31:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* 3520 // CHECK3-NEXT: store i8* [[TMP31]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] 3521 // CHECK3-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 0 3522 // CHECK3-NEXT: [[TMP33:%.*]] = icmp eq i16 [[TMP8]], 1 3523 // CHECK3-NEXT: [[TMP34:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] 3524 // CHECK3-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] 3525 // CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 2 3526 // CHECK3-NEXT: [[TMP37:%.*]] = and i16 [[TMP6]], 1 3527 // CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP37]], 0 3528 // CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP36]], [[TMP38]] 3529 // CHECK3-NEXT: [[TMP40:%.*]] = icmp sgt i16 [[TMP7]], 0 3530 // CHECK3-NEXT: [[TMP41:%.*]] = and i1 [[TMP39]], [[TMP40]] 3531 // CHECK3-NEXT: [[TMP42:%.*]] = or i1 [[TMP32]], [[TMP35]] 3532 // CHECK3-NEXT: [[TMP43:%.*]] = or i1 [[TMP42]], [[TMP41]] 3533 // CHECK3-NEXT: br i1 [[TMP43]], label [[THEN:%.*]], label [[ELSE:%.*]] 3534 // CHECK3: then: 3535 // CHECK3-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* 3536 // CHECK3-NEXT: [[TMP45:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* 3537 // CHECK3-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP44]], i8* [[TMP45]]) #[[ATTR5]] 3538 // CHECK3-NEXT: br label [[IFCONT:%.*]] 3539 // CHECK3: else: 3540 // CHECK3-NEXT: br label [[IFCONT]] 3541 // CHECK3: ifcont: 3542 // CHECK3-NEXT: [[TMP46:%.*]] = icmp eq i16 [[TMP8]], 1 3543 // CHECK3-NEXT: [[TMP47:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] 3544 // CHECK3-NEXT: [[TMP48:%.*]] = and i1 [[TMP46]], [[TMP47]] 3545 // CHECK3-NEXT: br i1 [[TMP48]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 3546 // CHECK3: then4: 3547 // CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 3548 // CHECK3-NEXT: [[TMP50:%.*]] = load i8*, i8** [[TMP49]], align 8 3549 // CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 3550 // CHECK3-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 8 3551 // CHECK3-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP50]] to %"class.std::complex.0"* 3552 // CHECK3-NEXT: [[TMP54:%.*]] = bitcast i8* [[TMP52]] to %"class.std::complex.0"* 3553 // CHECK3-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP54]] to i8* 3554 // CHECK3-NEXT: [[TMP56:%.*]] = bitcast %"class.std::complex.0"* [[TMP53]] to i8* 3555 // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP55]], i8* align 8 [[TMP56]], i64 16, i1 false), !tbaa.struct !27 3556 // CHECK3-NEXT: br label [[IFCONT6:%.*]] 3557 // CHECK3: else5: 3558 // CHECK3-NEXT: br label [[IFCONT6]] 3559 // CHECK3: ifcont6: 3560 // CHECK3-NEXT: ret void 3561 // 3562 // 3563 // CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 3564 // CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { 3565 // CHECK3-NEXT: entry: 3566 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 3567 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3568 // CHECK3-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 3569 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 3570 // CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 3571 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 3572 // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 3573 // CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 3574 // CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 3575 // CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 3576 // CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 3577 // CHECK3-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 3578 // CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* 3579 // CHECK3-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 3580 // CHECK3-NEXT: br label [[PRECOND:%.*]] 3581 // CHECK3: precond: 3582 // CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 3583 // CHECK3-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 4 3584 // CHECK3-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] 3585 // CHECK3: body: 3586 // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) 3587 // CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 3588 // CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 3589 // CHECK3: then: 3590 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 3591 // CHECK3-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8, !tbaa [[TBAA12]] 3592 // CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* 3593 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] 3594 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 3595 // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 3596 // CHECK3-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 3597 // CHECK3-NEXT: br label [[IFCONT:%.*]] 3598 // CHECK3: else: 3599 // CHECK3-NEXT: br label [[IFCONT]] 3600 // CHECK3: ifcont: 3601 // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) 3602 // CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 3603 // CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] 3604 // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 3605 // CHECK3: then2: 3606 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 3607 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 3608 // CHECK3-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8, !tbaa [[TBAA12]] 3609 // CHECK3-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* 3610 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] 3611 // CHECK3-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4, !tbaa [[TBAA8]] 3612 // CHECK3-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !tbaa [[TBAA8]] 3613 // CHECK3-NEXT: br label [[IFCONT4:%.*]] 3614 // CHECK3: else3: 3615 // CHECK3-NEXT: br label [[IFCONT4]] 3616 // CHECK3: ifcont4: 3617 // CHECK3-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 3618 // CHECK3-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 3619 // CHECK3-NEXT: br label [[PRECOND]] 3620 // CHECK3: exit: 3621 // CHECK3-NEXT: ret void 3622 // 3623 // 3624 // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 3625 // CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR6]] { 3626 // CHECK3-NEXT: entry: 3627 // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 3628 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 3629 // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 3630 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 3631 // CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] 3632 // CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 3633 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 3634 // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 3635 // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 3636 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 3637 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** 3638 // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] 3639 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 3640 // CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** 3641 // CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] 3642 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 3643 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** 3644 // CHECK3-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8, !tbaa [[TBAA12]] 3645 // CHECK3-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.0"* [[TMP11]]) #[[ATTR5]] 3646 // CHECK3-NEXT: ret void 3647 // 3648 // 3649 // CHECK3-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC2ERKfS2_ 3650 // CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { 3651 // CHECK3-NEXT: entry: 3652 // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 3653 // CHECK3-NEXT: [[__RE_ADDR:%.*]] = alloca float*, align 8 3654 // CHECK3-NEXT: [[__IM_ADDR:%.*]] = alloca float*, align 8 3655 // CHECK3-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 3656 // CHECK3-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 3657 // CHECK3-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 3658 // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 3659 // CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 3660 // CHECK3-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 3661 // CHECK3-NEXT: [[TMP1:%.*]] = load float, float* [[TMP0]], align 4, !tbaa [[TBAA14]] 3662 // CHECK3-NEXT: store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] 3663 // CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 3664 // CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 3665 // CHECK3-NEXT: [[TMP3:%.*]] = load float, float* [[TMP2]], align 4, !tbaa [[TBAA14]] 3666 // CHECK3-NEXT: store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA18]] 3667 // CHECK3-NEXT: ret void 3668 // 3669 // 3670 // CHECK3-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4realEv 3671 // CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { 3672 // CHECK3-NEXT: entry: 3673 // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 3674 // CHECK3-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 3675 // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 3676 // CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 3677 // CHECK3-NEXT: [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16]] 3678 // CHECK3-NEXT: ret float [[TMP0]] 3679 // 3680 // 3681 // CHECK3-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4imagEv 3682 // CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { 3683 // CHECK3-NEXT: entry: 3684 // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 3685 // CHECK3-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 3686 // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 3687 // CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 3688 // CHECK3-NEXT: [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18]] 3689 // CHECK3-NEXT: ret float [[TMP0]] 3690 // 3691 // 3692 // CHECK3-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC2ERKdS2_ 3693 // CHECK3-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { 3694 // CHECK3-NEXT: entry: 3695 // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 3696 // CHECK3-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 3697 // CHECK3-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 3698 // CHECK3-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 3699 // CHECK3-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 3700 // CHECK3-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 3701 // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 3702 // CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 3703 // CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 3704 // CHECK3-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8, !tbaa [[TBAA22]] 3705 // CHECK3-NEXT: store double [[TMP1]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] 3706 // CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 3707 // CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 3708 // CHECK3-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8, !tbaa [[TBAA22]] 3709 // CHECK3-NEXT: store double [[TMP3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] 3710 // CHECK3-NEXT: ret void 3711 // 3712 // 3713 // CHECK3-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4realEv 3714 // CHECK3-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { 3715 // CHECK3-NEXT: entry: 3716 // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 3717 // CHECK3-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 3718 // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 3719 // CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 3720 // CHECK3-NEXT: [[TMP0:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24]] 3721 // CHECK3-NEXT: ret double [[TMP0]] 3722 // 3723 // 3724 // CHECK3-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4imagEv 3725 // CHECK3-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { 3726 // CHECK3-NEXT: entry: 3727 // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 3728 // CHECK3-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 3729 // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 3730 // CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 3731 // CHECK3-NEXT: [[TMP0:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26]] 3732 // CHECK3-NEXT: ret double [[TMP0]] 3733 // 3734