1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ 2 // RUN: %clang_cc1 -no-opaque-pointers -no-enable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc 3 // RUN: %clang_cc1 -no-opaque-pointers -no-enable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 4 // RUN: %clang_cc1 -no-opaque-pointers -no-enable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc 5 // RUN: %clang_cc1 -no-opaque-pointers -no-enable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK1 6 // RUN: %clang_cc1 -no-opaque-pointers -no-enable-noundef-analysis -x c++ -O1 -disable-llvm-optzns -verify -fopenmp -internal-isystem %S/../Headers/Inputs/include -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -fexceptions -fcxx-exceptions -aux-triple powerpc64le-unknown-unknown -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK1 7 // expected-no-diagnostics 8 #ifndef HEADER 9 #define HEADER 10 11 #include <complex> 12 13 // Verify we do not add tbaa metadata to type punned memory operations: 14 template <typename T> 15 void complex_reduction() { 16 #pragma omp target teams distribute 17 for (int ib = 0; ib < 100; ib++) { 18 std::complex<T> partial_sum; 19 const int istart = ib * 4; 20 const int iend = (ib + 1) * 4; 21 #pragma omp parallel for reduction(+ \ 22 : partial_sum) 23 for (int i = istart; i < iend; i++) 24 partial_sum += std::complex<T>(i, i); 25 } 26 } 27 28 void test() { 29 complex_reduction<float>(); 30 complex_reduction<double>(); 31 } 32 #endif 33 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l16 34 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] { 35 // CHECK1-NEXT: entry: 36 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 37 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 38 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1:[0-9]+]], i8 1, i1 true, i1 true) 39 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 40 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 41 // CHECK1: user_code.entry: 42 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 43 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 44 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] 45 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR6:[0-9]+]] 46 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 47 // CHECK1-NEXT: ret void 48 // CHECK1: worker.exit: 49 // CHECK1-NEXT: ret void 50 // 51 // 52 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ 53 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1:[0-9]+]] { 54 // CHECK1-NEXT: entry: 55 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 56 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 57 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 58 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 59 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 60 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 61 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 62 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 63 // CHECK1-NEXT: [[IB:%.*]] = alloca i32, align 4 64 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 65 // CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca float, align 4 66 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 67 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12:![0-9]+]] 68 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 69 // CHECK1-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 70 // CHECK1-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* 71 // CHECK1-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 72 // CHECK1-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* 73 // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 8) 74 // CHECK1-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* 75 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 76 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR6]] 77 // CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 78 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR6]] 79 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 80 // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 81 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR6]] 82 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 83 // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 84 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR6]] 85 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 86 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 87 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR6]] 88 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 89 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* 90 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR6]] 91 // CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 92 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] 93 // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 94 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 95 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 96 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 97 // CHECK1: cond.true: 98 // CHECK1-NEXT: br label [[COND_END:%.*]] 99 // CHECK1: cond.false: 100 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 101 // CHECK1-NEXT: br label [[COND_END]] 102 // CHECK1: cond.end: 103 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] 104 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 105 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 106 // CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 107 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 108 // CHECK1: omp.inner.for.cond: 109 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 110 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 111 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] 112 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 113 // CHECK1: omp.inner.for.cond.cleanup: 114 // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 115 // CHECK1: omp.inner.for.body: 116 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 117 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 118 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 119 // CHECK1-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] 120 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast float* [[REF_TMP]] to i8* 121 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP14]]) #[[ATTR6]] 122 // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14:![0-9]+]] 123 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* 124 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR6]] 125 // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] 126 // CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR12:[0-9]+]] 127 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* 128 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR6]] 129 // CHECK1-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* 130 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR6]] 131 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 132 // CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 133 // CHECK1-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] 134 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 135 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 136 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 137 // CHECK1-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] 138 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 139 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* 140 // CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] 141 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 142 // CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* 143 // CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] 144 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 145 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM_ON_STACK]] to i8* 146 // CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] 147 // CHECK1-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 148 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex"*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP26]], i64 3) 149 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 150 // CHECK1: omp.body.continue: 151 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 152 // CHECK1: omp.inner.for.inc: 153 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 154 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 155 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 156 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] 157 // CHECK1: omp.inner.for.end: 158 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 159 // CHECK1: omp.loop.exit: 160 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) 161 // CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* 162 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR6]] 163 // CHECK1-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 164 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR6]] 165 // CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 166 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR6]] 167 // CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 168 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR6]] 169 // CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 170 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR6]] 171 // CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 172 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR6]] 173 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 8) 174 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) 175 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) 176 // CHECK1-NEXT: ret void 177 // 178 // 179 // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC1ERKfS2_ 180 // CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR4:[0-9]+]] comdat align 2 { 181 // CHECK1-NEXT: entry: 182 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 183 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca float*, align 8 184 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca float*, align 8 185 // CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 186 // CHECK1-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 187 // CHECK1-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 188 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 189 // CHECK1-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 190 // CHECK1-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 191 // CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR12]] 192 // CHECK1-NEXT: ret void 193 // 194 // 195 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 196 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] { 197 // CHECK1-NEXT: entry: 198 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 199 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 200 // CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca i32*, align 8 201 // CHECK1-NEXT: [[IEND_ADDR:%.*]] = alloca i32*, align 8 202 // CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 203 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 204 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 205 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 206 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 207 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 208 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 209 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 210 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 211 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 212 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 213 // CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex", align 4 214 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 215 // CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca float, align 4 216 // CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4 217 // CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex", align 4 218 // CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca float, align 4 219 // CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca float, align 4 220 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 221 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] 222 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 223 // CHECK1-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 224 // CHECK1-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 225 // CHECK1-NEXT: store %"class.std::complex"* [[PARTIAL_SUM]], %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 226 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 227 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 228 // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 229 // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 230 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR6]] 231 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 232 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR6]] 233 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] 234 // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 235 // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 236 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR6]] 237 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] 238 // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 239 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 240 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR6]] 241 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 242 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 243 // CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] 244 // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 245 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 246 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 247 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 248 // CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 249 // CHECK1-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* 250 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR6]] 251 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 252 // CHECK1-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] 253 // CHECK1-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* 254 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR6]] 255 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 256 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 257 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] 258 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 259 // CHECK1: omp.precond.then: 260 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 261 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR6]] 262 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 263 // CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 264 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR6]] 265 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 266 // CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 267 // CHECK1-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 268 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR6]] 269 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 270 // CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 271 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR6]] 272 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 273 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* 274 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR6]] 275 // CHECK1-NEXT: [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8* 276 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR6]] 277 // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14]] 278 // CHECK1-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* 279 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR6]] 280 // CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] 281 // CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR12]] 282 // CHECK1-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* 283 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR6]] 284 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* 285 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR6]] 286 // CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* 287 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR6]] 288 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 289 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] 290 // CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 291 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] 292 // CHECK1: omp.dispatch.cond: 293 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 294 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 295 // CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] 296 // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 297 // CHECK1: cond.true: 298 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 299 // CHECK1-NEXT: br label [[COND_END:%.*]] 300 // CHECK1: cond.false: 301 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 302 // CHECK1-NEXT: br label [[COND_END]] 303 // CHECK1: cond.end: 304 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] 305 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 306 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 307 // CHECK1-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 308 // CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 309 // CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 310 // CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 311 // CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] 312 // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] 313 // CHECK1: omp.dispatch.cleanup: 314 // CHECK1-NEXT: br label [[OMP_DISPATCH_END:%.*]] 315 // CHECK1: omp.dispatch.body: 316 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 317 // CHECK1: omp.inner.for.cond: 318 // CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 319 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 320 // CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 321 // CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] 322 // CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 323 // CHECK1: omp.inner.for.cond.cleanup: 324 // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 325 // CHECK1: omp.inner.for.body: 326 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 327 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 328 // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 329 // CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] 330 // CHECK1-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] 331 // CHECK1-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* 332 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR6]] 333 // CHECK1-NEXT: [[TMP41:%.*]] = bitcast float* [[REF_TMP15]] to i8* 334 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR6]] 335 // CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 336 // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to float 337 // CHECK1-NEXT: store float [[CONV]], float* [[REF_TMP15]], align 4, !tbaa [[TBAA14]] 338 // CHECK1-NEXT: [[TMP43:%.*]] = bitcast float* [[REF_TMP16]] to i8* 339 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR6]] 340 // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 341 // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float 342 // CHECK1-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] 343 // CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR12]] 344 // CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR12]] 345 // CHECK1-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* 346 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR6]] 347 // CHECK1-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* 348 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR6]] 349 // CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* 350 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR6]] 351 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 352 // CHECK1: omp.body.continue: 353 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 354 // CHECK1: omp.inner.for.inc: 355 // CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 356 // CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 357 // CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 358 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] 359 // CHECK1: omp.inner.for.end: 360 // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] 361 // CHECK1: omp.dispatch.inc: 362 // CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 363 // CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 364 // CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] 365 // CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 366 // CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 367 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 368 // CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] 369 // CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 370 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] 371 // CHECK1: omp.dispatch.end: 372 // CHECK1-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 373 // CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] 374 // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP54]]) 375 // CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 376 // CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] 377 // CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 378 // CHECK1-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* 379 // CHECK1-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 380 // CHECK1-NEXT: [[TMP59:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* 381 // CHECK1-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func) 382 // CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 383 // CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 384 // CHECK1: .omp.reduction.then: 385 // CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR12]] 386 // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) 387 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 388 // CHECK1: .omp.reduction.done: 389 // CHECK1-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* 390 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR6]] 391 // CHECK1-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* 392 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR6]] 393 // CHECK1-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 394 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR6]] 395 // CHECK1-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 396 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR6]] 397 // CHECK1-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 398 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR6]] 399 // CHECK1-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 400 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR6]] 401 // CHECK1-NEXT: br label [[OMP_PRECOND_END]] 402 // CHECK1: omp.precond.end: 403 // CHECK1-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 404 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR6]] 405 // CHECK1-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 406 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR6]] 407 // CHECK1-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 408 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR6]] 409 // CHECK1-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 410 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR6]] 411 // CHECK1-NEXT: ret void 412 // 413 // 414 // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEpLIfEERS0_RKS_IT_E 415 // CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[__C:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 { 416 // CHECK1-NEXT: entry: 417 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 418 // CHECK1-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 419 // CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 420 // CHECK1-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 421 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 422 // CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 423 // CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR12]] 424 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 425 // CHECK1-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] 426 // CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] 427 // CHECK1-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] 428 // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 429 // CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR12]] 430 // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 431 // CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] 432 // CHECK1-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] 433 // CHECK1-NEXT: store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA18]] 434 // CHECK1-NEXT: ret %"class.std::complex"* [[THIS1]] 435 // 436 // 437 // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func 438 // CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR7:[0-9]+]] { 439 // CHECK1-NEXT: entry: 440 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 441 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 442 // CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 443 // CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 444 // CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 445 // CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex", align 4 446 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 447 // CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19:![0-9]+]] 448 // CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 449 // CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 450 // CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 451 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* 452 // CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] 453 // CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 454 // CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 455 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 456 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex"** 457 // CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8 458 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 459 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex", %"class.std::complex"* [[TMP11]], i64 1 460 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex"* [[TMP13]] to i8* 461 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex"* [[TMP11]] to i64* 462 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* 463 // CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 4 464 // CHECK1-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_get_warp_size() 465 // CHECK1-NEXT: [[TMP19:%.*]] = trunc i32 [[TMP18]] to i16 466 // CHECK1-NEXT: [[TMP20:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP19]]) 467 // CHECK1-NEXT: store i64 [[TMP20]], i64* [[TMP16]], align 4 468 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 469 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 470 // CHECK1-NEXT: [[TMP23:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* 471 // CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP12]], align 8, !tbaa [[TBAA12]] 472 // CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 0 473 // CHECK1-NEXT: [[TMP25:%.*]] = icmp eq i16 [[TMP8]], 1 474 // CHECK1-NEXT: [[TMP26:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] 475 // CHECK1-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] 476 // CHECK1-NEXT: [[TMP28:%.*]] = icmp eq i16 [[TMP8]], 2 477 // CHECK1-NEXT: [[TMP29:%.*]] = and i16 [[TMP6]], 1 478 // CHECK1-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP29]], 0 479 // CHECK1-NEXT: [[TMP31:%.*]] = and i1 [[TMP28]], [[TMP30]] 480 // CHECK1-NEXT: [[TMP32:%.*]] = icmp sgt i16 [[TMP7]], 0 481 // CHECK1-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] 482 // CHECK1-NEXT: [[TMP34:%.*]] = or i1 [[TMP24]], [[TMP27]] 483 // CHECK1-NEXT: [[TMP35:%.*]] = or i1 [[TMP34]], [[TMP33]] 484 // CHECK1-NEXT: br i1 [[TMP35]], label [[THEN:%.*]], label [[ELSE:%.*]] 485 // CHECK1: then: 486 // CHECK1-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* 487 // CHECK1-NEXT: [[TMP37:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* 488 // CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP36]], i8* [[TMP37]]) #[[ATTR6]] 489 // CHECK1-NEXT: br label [[IFCONT:%.*]] 490 // CHECK1: else: 491 // CHECK1-NEXT: br label [[IFCONT]] 492 // CHECK1: ifcont: 493 // CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 1 494 // CHECK1-NEXT: [[TMP39:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] 495 // CHECK1-NEXT: [[TMP40:%.*]] = and i1 [[TMP38]], [[TMP39]] 496 // CHECK1-NEXT: br i1 [[TMP40]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 497 // CHECK1: then4: 498 // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 499 // CHECK1-NEXT: [[TMP42:%.*]] = bitcast i8** [[TMP41]] to %"class.std::complex"** 500 // CHECK1-NEXT: [[TMP43:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP42]], align 8 501 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 502 // CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8** [[TMP44]] to %"class.std::complex"** 503 // CHECK1-NEXT: [[TMP46:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP45]], align 8 504 // CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[TMP46]] to i8* 505 // CHECK1-NEXT: [[TMP48:%.*]] = bitcast %"class.std::complex"* [[TMP43]] to i8* 506 // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 8, i1 false), !tbaa.struct !21 507 // CHECK1-NEXT: br label [[IFCONT6:%.*]] 508 // CHECK1: else5: 509 // CHECK1-NEXT: br label [[IFCONT6]] 510 // CHECK1: ifcont6: 511 // CHECK1-NEXT: ret void 512 // 513 // 514 // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func 515 // CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { 516 // CHECK1-NEXT: entry: 517 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 518 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 519 // CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 520 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 521 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 522 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 523 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 524 // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 525 // CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 526 // CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 527 // CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 528 // CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 529 // CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* 530 // CHECK1-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 531 // CHECK1-NEXT: br label [[PRECOND:%.*]] 532 // CHECK1: precond: 533 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 534 // CHECK1-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 2 535 // CHECK1-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] 536 // CHECK1: body: 537 // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) 538 // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 539 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 540 // CHECK1: then: 541 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 542 // CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8, !tbaa [[TBAA12]] 543 // CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* 544 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] 545 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 546 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 547 // CHECK1-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 548 // CHECK1-NEXT: br label [[IFCONT:%.*]] 549 // CHECK1: else: 550 // CHECK1-NEXT: br label [[IFCONT]] 551 // CHECK1: ifcont: 552 // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) 553 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 554 // CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] 555 // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 556 // CHECK1: then2: 557 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 558 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 559 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8, !tbaa [[TBAA12]] 560 // CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* 561 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] 562 // CHECK1-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4, !tbaa [[TBAA8]] 563 // CHECK1-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !tbaa [[TBAA8]] 564 // CHECK1-NEXT: br label [[IFCONT4:%.*]] 565 // CHECK1: else3: 566 // CHECK1-NEXT: br label [[IFCONT4]] 567 // CHECK1: ifcont4: 568 // CHECK1-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 569 // CHECK1-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 570 // CHECK1-NEXT: br label [[PRECOND]] 571 // CHECK1: exit: 572 // CHECK1-NEXT: ret void 573 // 574 // 575 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper 576 // CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { 577 // CHECK1-NEXT: entry: 578 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 579 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 580 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 581 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 582 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] 583 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 584 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 585 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 586 // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 587 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 588 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** 589 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] 590 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 591 // CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** 592 // CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] 593 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 594 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex"** 595 // CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8, !tbaa [[TBAA12]] 596 // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR6]] 597 // CHECK1-NEXT: ret void 598 // 599 // 600 // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l16 601 // CHECK1-SAME: () #[[ATTR0]] { 602 // CHECK1-NEXT: entry: 603 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 604 // CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 605 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 true, i1 true) 606 // CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 607 // CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] 608 // CHECK1: user_code.entry: 609 // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 610 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 611 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] 612 // CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR6]] 613 // CHECK1-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) 614 // CHECK1-NEXT: ret void 615 // CHECK1: worker.exit: 616 // CHECK1-NEXT: ret void 617 // 618 // 619 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 620 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { 621 // CHECK1-NEXT: entry: 622 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 623 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 624 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 625 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 626 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 627 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 628 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 629 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 630 // CHECK1-NEXT: [[IB:%.*]] = alloca i32, align 4 631 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 632 // CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca double, align 8 633 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 634 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] 635 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 636 // CHECK1-NEXT: [[ISTART:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 637 // CHECK1-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* 638 // CHECK1-NEXT: [[IEND:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 4) 639 // CHECK1-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* 640 // CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call align 16 i8* @__kmpc_alloc_shared(i64 16) 641 // CHECK1-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex.0"* 642 // CHECK1-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 643 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR6]] 644 // CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 645 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR6]] 646 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 647 // CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 648 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR6]] 649 // CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 650 // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 651 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR6]] 652 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 653 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 654 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR6]] 655 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 656 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* 657 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR6]] 658 // CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 659 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] 660 // CHECK1-NEXT: call void @__kmpc_distribute_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 661 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 662 // CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 663 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 664 // CHECK1: cond.true: 665 // CHECK1-NEXT: br label [[COND_END:%.*]] 666 // CHECK1: cond.false: 667 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 668 // CHECK1-NEXT: br label [[COND_END]] 669 // CHECK1: cond.end: 670 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] 671 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 672 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 673 // CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 674 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 675 // CHECK1: omp.inner.for.cond: 676 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 677 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 678 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] 679 // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 680 // CHECK1: omp.inner.for.cond.cleanup: 681 // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 682 // CHECK1: omp.inner.for.body: 683 // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 684 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 685 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] 686 // CHECK1-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] 687 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast double* [[REF_TMP]] to i8* 688 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP14]]) #[[ATTR6]] 689 // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22:![0-9]+]] 690 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* 691 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR6]] 692 // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] 693 // CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR12]] 694 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* 695 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR6]] 696 // CHECK1-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* 697 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP17]]) #[[ATTR6]] 698 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 699 // CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 700 // CHECK1-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] 701 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] 702 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 703 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 704 // CHECK1-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] 705 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 706 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* 707 // CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] 708 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 709 // CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* 710 // CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] 711 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 712 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM_ON_STACK]] to i8* 713 // CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] 714 // CHECK1-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** 715 // CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex.0"*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP26]], i64 3) 716 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 717 // CHECK1: omp.body.continue: 718 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 719 // CHECK1: omp.inner.for.inc: 720 // CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 721 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 722 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 723 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] 724 // CHECK1: omp.inner.for.end: 725 // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] 726 // CHECK1: omp.loop.exit: 727 // CHECK1-NEXT: call void @__kmpc_distribute_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP7]]) 728 // CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* 729 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR6]] 730 // CHECK1-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 731 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR6]] 732 // CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 733 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR6]] 734 // CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 735 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR6]] 736 // CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 737 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR6]] 738 // CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 739 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR6]] 740 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]], i64 16) 741 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[IEND]], i64 4) 742 // CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]], i64 4) 743 // CHECK1-NEXT: ret void 744 // 745 // 746 // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC1ERKdS2_ 747 // CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { 748 // CHECK1-NEXT: entry: 749 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 750 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 751 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 752 // CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 753 // CHECK1-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 754 // CHECK1-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 755 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 756 // CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 757 // CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 758 // CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR12]] 759 // CHECK1-NEXT: ret void 760 // 761 // 762 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 763 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR1]] { 764 // CHECK1-NEXT: entry: 765 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 766 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 767 // CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca i32*, align 8 768 // CHECK1-NEXT: [[IEND_ADDR:%.*]] = alloca i32*, align 8 769 // CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 770 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 771 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 772 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 773 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 774 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 775 // CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 776 // CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 777 // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 778 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 779 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 780 // CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex.0", align 8 781 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 782 // CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca double, align 8 783 // CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4 784 // CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex.0", align 8 785 // CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca double, align 8 786 // CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca double, align 8 787 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 788 // CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] 789 // CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] 790 // CHECK1-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 791 // CHECK1-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 792 // CHECK1-NEXT: store %"class.std::complex.0"* [[PARTIAL_SUM]], %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 793 // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] 794 // CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] 795 // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] 796 // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 797 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR6]] 798 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 799 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR6]] 800 // CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] 801 // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 802 // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 803 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR6]] 804 // CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] 805 // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 806 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 807 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR6]] 808 // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 809 // CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 810 // CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] 811 // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 812 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 813 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 814 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 815 // CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 816 // CHECK1-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* 817 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR6]] 818 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 819 // CHECK1-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] 820 // CHECK1-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* 821 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR6]] 822 // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 823 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] 824 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] 825 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] 826 // CHECK1: omp.precond.then: 827 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 828 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR6]] 829 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 830 // CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 831 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR6]] 832 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 833 // CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 834 // CHECK1-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 835 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR6]] 836 // CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 837 // CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 838 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR6]] 839 // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] 840 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* 841 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR6]] 842 // CHECK1-NEXT: [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8* 843 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR6]] 844 // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22]] 845 // CHECK1-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* 846 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR6]] 847 // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] 848 // CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR12]] 849 // CHECK1-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* 850 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR6]] 851 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* 852 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR6]] 853 // CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* 854 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR6]] 855 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 856 // CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] 857 // CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB3]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) 858 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] 859 // CHECK1: omp.dispatch.cond: 860 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 861 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 862 // CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] 863 // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] 864 // CHECK1: cond.true: 865 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] 866 // CHECK1-NEXT: br label [[COND_END:%.*]] 867 // CHECK1: cond.false: 868 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 869 // CHECK1-NEXT: br label [[COND_END]] 870 // CHECK1: cond.end: 871 // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] 872 // CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 873 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 874 // CHECK1-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 875 // CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 876 // CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 877 // CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 878 // CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] 879 // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] 880 // CHECK1: omp.dispatch.cleanup: 881 // CHECK1-NEXT: br label [[OMP_DISPATCH_END:%.*]] 882 // CHECK1: omp.dispatch.body: 883 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] 884 // CHECK1: omp.inner.for.cond: 885 // CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 886 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 887 // CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 888 // CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] 889 // CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] 890 // CHECK1: omp.inner.for.cond.cleanup: 891 // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] 892 // CHECK1: omp.inner.for.body: 893 // CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] 894 // CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 895 // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 896 // CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] 897 // CHECK1-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] 898 // CHECK1-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* 899 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR6]] 900 // CHECK1-NEXT: [[TMP41:%.*]] = bitcast double* [[REF_TMP15]] to i8* 901 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR6]] 902 // CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 903 // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to double 904 // CHECK1-NEXT: store double [[CONV]], double* [[REF_TMP15]], align 8, !tbaa [[TBAA22]] 905 // CHECK1-NEXT: [[TMP43:%.*]] = bitcast double* [[REF_TMP16]] to i8* 906 // CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR6]] 907 // CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] 908 // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double 909 // CHECK1-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] 910 // CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR12]] 911 // CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR12]] 912 // CHECK1-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* 913 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR6]] 914 // CHECK1-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* 915 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR6]] 916 // CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* 917 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR6]] 918 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] 919 // CHECK1: omp.body.continue: 920 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] 921 // CHECK1: omp.inner.for.inc: 922 // CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 923 // CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 924 // CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] 925 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] 926 // CHECK1: omp.inner.for.end: 927 // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] 928 // CHECK1: omp.dispatch.inc: 929 // CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 930 // CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 931 // CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] 932 // CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] 933 // CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 934 // CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] 935 // CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] 936 // CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] 937 // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] 938 // CHECK1: omp.dispatch.end: 939 // CHECK1-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 940 // CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] 941 // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3]], i32 [[TMP54]]) 942 // CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 943 // CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] 944 // CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 945 // CHECK1-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* 946 // CHECK1-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 947 // CHECK1-NEXT: [[TMP59:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* 948 // CHECK1-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func5, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func6) 949 // CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 950 // CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] 951 // CHECK1: .omp.reduction.then: 952 // CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR12]] 953 // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) 954 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] 955 // CHECK1: .omp.reduction.done: 956 // CHECK1-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* 957 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR6]] 958 // CHECK1-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* 959 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR6]] 960 // CHECK1-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* 961 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR6]] 962 // CHECK1-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* 963 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR6]] 964 // CHECK1-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* 965 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR6]] 966 // CHECK1-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* 967 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR6]] 968 // CHECK1-NEXT: br label [[OMP_PRECOND_END]] 969 // CHECK1: omp.precond.end: 970 // CHECK1-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* 971 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR6]] 972 // CHECK1-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* 973 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR6]] 974 // CHECK1-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* 975 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR6]] 976 // CHECK1-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* 977 // CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR6]] 978 // CHECK1-NEXT: ret void 979 // 980 // 981 // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEpLIdEERS0_RKS_IT_E 982 // CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR5]] comdat align 2 { 983 // CHECK1-NEXT: entry: 984 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 985 // CHECK1-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 986 // CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 987 // CHECK1-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 988 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 989 // CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 990 // CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR12]] 991 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 992 // CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] 993 // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] 994 // CHECK1-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] 995 // CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] 996 // CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR12]] 997 // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 998 // CHECK1-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] 999 // CHECK1-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] 1000 // CHECK1-NEXT: store double [[ADD3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] 1001 // CHECK1-NEXT: ret %"class.std::complex.0"* [[THIS1]] 1002 // 1003 // 1004 // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 1005 // CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR7]] { 1006 // CHECK1-NEXT: entry: 1007 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 1008 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 1009 // CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 1010 // CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 1011 // CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 1012 // CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.0", align 8 1013 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 1014 // CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] 1015 // CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 1016 // CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 1017 // CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 1018 // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* 1019 // CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] 1020 // CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] 1021 // CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] 1022 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 1023 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** 1024 // CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8 1025 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 1026 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.0", %"class.std::complex.0"* [[TMP11]], i64 1 1027 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex.0"* [[TMP13]] to i8* 1028 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.0"* [[TMP11]] to i64* 1029 // CHECK1-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* 1030 // CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] 1031 // CHECK1: .shuffle.pre_cond: 1032 // CHECK1-NEXT: [[TMP17:%.*]] = phi i64* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[TMP29:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] 1033 // CHECK1-NEXT: [[TMP18:%.*]] = phi i64* [ [[TMP16]], [[ENTRY]] ], [ [[TMP30:%.*]], [[DOTSHUFFLE_THEN]] ] 1034 // CHECK1-NEXT: [[TMP19:%.*]] = bitcast i64* [[TMP17]] to i8* 1035 // CHECK1-NEXT: [[TMP20:%.*]] = ptrtoint i8* [[TMP14]] to i64 1036 // CHECK1-NEXT: [[TMP21:%.*]] = ptrtoint i8* [[TMP19]] to i64 1037 // CHECK1-NEXT: [[TMP22:%.*]] = sub i64 [[TMP20]], [[TMP21]] 1038 // CHECK1-NEXT: [[TMP23:%.*]] = sdiv exact i64 [[TMP22]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) 1039 // CHECK1-NEXT: [[TMP24:%.*]] = icmp sgt i64 [[TMP23]], 7 1040 // CHECK1-NEXT: br i1 [[TMP24]], label [[DOTSHUFFLE_THEN]], label [[DOTSHUFFLE_EXIT:%.*]] 1041 // CHECK1: .shuffle.then: 1042 // CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[TMP17]], align 8 1043 // CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_get_warp_size() 1044 // CHECK1-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 1045 // CHECK1-NEXT: [[TMP28:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP25]], i16 [[TMP7]], i16 [[TMP27]]) 1046 // CHECK1-NEXT: store i64 [[TMP28]], i64* [[TMP18]], align 8 1047 // CHECK1-NEXT: [[TMP29]] = getelementptr i64, i64* [[TMP17]], i64 1 1048 // CHECK1-NEXT: [[TMP30]] = getelementptr i64, i64* [[TMP18]], i64 1 1049 // CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND]] 1050 // CHECK1: .shuffle.exit: 1051 // CHECK1-NEXT: [[TMP31:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* 1052 // CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP12]], align 8, !tbaa [[TBAA12]] 1053 // CHECK1-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 0 1054 // CHECK1-NEXT: [[TMP33:%.*]] = icmp eq i16 [[TMP8]], 1 1055 // CHECK1-NEXT: [[TMP34:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] 1056 // CHECK1-NEXT: [[TMP35:%.*]] = and i1 [[TMP33]], [[TMP34]] 1057 // CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 2 1058 // CHECK1-NEXT: [[TMP37:%.*]] = and i16 [[TMP6]], 1 1059 // CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP37]], 0 1060 // CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP36]], [[TMP38]] 1061 // CHECK1-NEXT: [[TMP40:%.*]] = icmp sgt i16 [[TMP7]], 0 1062 // CHECK1-NEXT: [[TMP41:%.*]] = and i1 [[TMP39]], [[TMP40]] 1063 // CHECK1-NEXT: [[TMP42:%.*]] = or i1 [[TMP32]], [[TMP35]] 1064 // CHECK1-NEXT: [[TMP43:%.*]] = or i1 [[TMP42]], [[TMP41]] 1065 // CHECK1-NEXT: br i1 [[TMP43]], label [[THEN:%.*]], label [[ELSE:%.*]] 1066 // CHECK1: then: 1067 // CHECK1-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* 1068 // CHECK1-NEXT: [[TMP45:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* 1069 // CHECK1-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP44]], i8* [[TMP45]]) #[[ATTR6]] 1070 // CHECK1-NEXT: br label [[IFCONT:%.*]] 1071 // CHECK1: else: 1072 // CHECK1-NEXT: br label [[IFCONT]] 1073 // CHECK1: ifcont: 1074 // CHECK1-NEXT: [[TMP46:%.*]] = icmp eq i16 [[TMP8]], 1 1075 // CHECK1-NEXT: [[TMP47:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] 1076 // CHECK1-NEXT: [[TMP48:%.*]] = and i1 [[TMP46]], [[TMP47]] 1077 // CHECK1-NEXT: br i1 [[TMP48]], label [[THEN4:%.*]], label [[ELSE5:%.*]] 1078 // CHECK1: then4: 1079 // CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 1080 // CHECK1-NEXT: [[TMP50:%.*]] = bitcast i8** [[TMP49]] to %"class.std::complex.0"** 1081 // CHECK1-NEXT: [[TMP51:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP50]], align 8 1082 // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 1083 // CHECK1-NEXT: [[TMP53:%.*]] = bitcast i8** [[TMP52]] to %"class.std::complex.0"** 1084 // CHECK1-NEXT: [[TMP54:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP53]], align 8 1085 // CHECK1-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP54]] to i8* 1086 // CHECK1-NEXT: [[TMP56:%.*]] = bitcast %"class.std::complex.0"* [[TMP51]] to i8* 1087 // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP55]], i8* align 8 [[TMP56]], i64 16, i1 false), !tbaa.struct !27 1088 // CHECK1-NEXT: br label [[IFCONT6:%.*]] 1089 // CHECK1: else5: 1090 // CHECK1-NEXT: br label [[IFCONT6]] 1091 // CHECK1: ifcont6: 1092 // CHECK1-NEXT: ret void 1093 // 1094 // 1095 // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 1096 // CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { 1097 // CHECK1-NEXT: entry: 1098 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 1099 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1100 // CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 1101 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) 1102 // CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] 1103 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 1104 // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1105 // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1106 // CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP4]], 31 1107 // CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() 1108 // CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP5]], 5 1109 // CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[DOTADDR]], align 8 1110 // CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to [1 x i8*]* 1111 // CHECK1-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 1112 // CHECK1-NEXT: br label [[PRECOND:%.*]] 1113 // CHECK1: precond: 1114 // CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 1115 // CHECK1-NEXT: [[TMP9:%.*]] = icmp ult i32 [[TMP8]], 4 1116 // CHECK1-NEXT: br i1 [[TMP9]], label [[BODY:%.*]], label [[EXIT:%.*]] 1117 // CHECK1: body: 1118 // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) 1119 // CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 1120 // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] 1121 // CHECK1: then: 1122 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 1123 // CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[TMP10]], align 8, !tbaa [[TBAA12]] 1124 // CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP11]] to i32* 1125 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 [[TMP8]] 1126 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] 1127 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP13]], align 4 1128 // CHECK1-NEXT: store volatile i32 [[TMP15]], i32 addrspace(3)* [[TMP14]], align 4 1129 // CHECK1-NEXT: br label [[IFCONT:%.*]] 1130 // CHECK1: else: 1131 // CHECK1-NEXT: br label [[IFCONT]] 1132 // CHECK1: ifcont: 1133 // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) 1134 // CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 1135 // CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP3]], [[TMP16]] 1136 // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN2:%.*]], label [[ELSE3:%.*]] 1137 // CHECK1: then2: 1138 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP3]] 1139 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP7]], i64 0, i64 0 1140 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8, !tbaa [[TBAA12]] 1141 // CHECK1-NEXT: [[TMP20:%.*]] = bitcast i8* [[TMP19]] to i32* 1142 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i32, i32* [[TMP20]], i32 [[TMP8]] 1143 // CHECK1-NEXT: [[TMP22:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP17]], align 4, !tbaa [[TBAA8]] 1144 // CHECK1-NEXT: store i32 [[TMP22]], i32* [[TMP21]], align 4, !tbaa [[TBAA8]] 1145 // CHECK1-NEXT: br label [[IFCONT4:%.*]] 1146 // CHECK1: else3: 1147 // CHECK1-NEXT: br label [[IFCONT4]] 1148 // CHECK1: ifcont4: 1149 // CHECK1-NEXT: [[TMP23:%.*]] = add nsw i32 [[TMP8]], 1 1150 // CHECK1-NEXT: store i32 [[TMP23]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] 1151 // CHECK1-NEXT: br label [[PRECOND]] 1152 // CHECK1: exit: 1153 // CHECK1-NEXT: ret void 1154 // 1155 // 1156 // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper 1157 // CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR7]] { 1158 // CHECK1-NEXT: entry: 1159 // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 1160 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 1161 // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 1162 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 1163 // CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] 1164 // CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] 1165 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 1166 // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) 1167 // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 1168 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 1169 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** 1170 // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] 1171 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 1172 // CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** 1173 // CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] 1174 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 1175 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** 1176 // CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8, !tbaa [[TBAA12]] 1177 // CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.0"* [[TMP11]]) #[[ATTR6]] 1178 // CHECK1-NEXT: ret void 1179 // 1180 // 1181 // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC2ERKfS2_ 1182 // CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { 1183 // CHECK1-NEXT: entry: 1184 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 1185 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca float*, align 8 1186 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca float*, align 8 1187 // CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 1188 // CHECK1-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 1189 // CHECK1-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 1190 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 1191 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 1192 // CHECK1-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 1193 // CHECK1-NEXT: [[TMP1:%.*]] = load float, float* [[TMP0]], align 4, !tbaa [[TBAA14]] 1194 // CHECK1-NEXT: store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] 1195 // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 1196 // CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 1197 // CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[TMP2]], align 4, !tbaa [[TBAA14]] 1198 // CHECK1-NEXT: store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA18]] 1199 // CHECK1-NEXT: ret void 1200 // 1201 // 1202 // CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4realEv 1203 // CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { 1204 // CHECK1-NEXT: entry: 1205 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 1206 // CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 1207 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 1208 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 1209 // CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16]] 1210 // CHECK1-NEXT: ret float [[TMP0]] 1211 // 1212 // 1213 // CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4imagEv 1214 // CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { 1215 // CHECK1-NEXT: entry: 1216 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 1217 // CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 1218 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 1219 // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 1220 // CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18]] 1221 // CHECK1-NEXT: ret float [[TMP0]] 1222 // 1223 // 1224 // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC2ERKdS2_ 1225 // CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { 1226 // CHECK1-NEXT: entry: 1227 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 1228 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 1229 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 1230 // CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 1231 // CHECK1-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 1232 // CHECK1-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 1233 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 1234 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 1235 // CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] 1236 // CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8, !tbaa [[TBAA22]] 1237 // CHECK1-NEXT: store double [[TMP1]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] 1238 // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 1239 // CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] 1240 // CHECK1-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8, !tbaa [[TBAA22]] 1241 // CHECK1-NEXT: store double [[TMP3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] 1242 // CHECK1-NEXT: ret void 1243 // 1244 // 1245 // CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4realEv 1246 // CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { 1247 // CHECK1-NEXT: entry: 1248 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 1249 // CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 1250 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 1251 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 1252 // CHECK1-NEXT: [[TMP0:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24]] 1253 // CHECK1-NEXT: ret double [[TMP0]] 1254 // 1255 // 1256 // CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4imagEv 1257 // CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { 1258 // CHECK1-NEXT: entry: 1259 // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 1260 // CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] 1261 // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 1262 // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 1263 // CHECK1-NEXT: [[TMP0:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26]] 1264 // CHECK1-NEXT: ret double [[TMP0]] 1265 // 1266