1; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-print-ast -disable-output < %s | FileCheck %s -check-prefix=AST 2; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR 3 4; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-import-jscop -polly-print-ast -disable-output < %s | FileCheck %s -check-prefix=AST-STRIDE4 5; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-import-jscop -polly-codegen -S < %s | FileCheck %s -check-prefix=IR-STRIDE4 6 7; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -polly-omp-backend=LLVM -polly-scheduling=static -polly-scheduling-chunksize=43 -S -verify-dom-info < %s | FileCheck %s -check-prefix=LIBOMP-IR-STATIC-CHUNKED 8; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -polly-omp-backend=LLVM -polly-scheduling=static -S -verify-dom-info < %s | FileCheck %s -check-prefix=LIBOMP-IR-STATIC 9; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -polly-omp-backend=LLVM -polly-scheduling=dynamic -S -verify-dom-info < %s | FileCheck %s -check-prefix=LIBOMP-IR-DYNAMIC 10; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -polly-omp-backend=LLVM -polly-scheduling=dynamic -polly-scheduling-chunksize=4 -S -verify-dom-info < %s | FileCheck %s -check-prefix=LIBOMP-IR-DYNAMIC-FOUR 11; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-import-jscop -polly-codegen -polly-omp-backend=LLVM -S < %s | FileCheck %s -check-prefix=LIBOMP-IR-STRIDE4 12 13; This extensive test case tests the creation of the full set of OpenMP calls 14; as well as the subfunction creation using a trivial loop as example. 15; 16; #define N 1024 17; float A[N]; 18; 19; void single_parallel_loop(void) { 20; for (long i = 0; i < N; i++) 21; A[i] = 1; 22; } 23 24; AST: #pragma simd 25; AST: #pragma omp parallel for 26; AST: for (int c0 = 0; c0 <= 1023; c0 += 1) 27; AST: Stmt_S(c0); 28 29; AST-STRIDE4: #pragma omp parallel for 30; AST-STRIDE4: for (int c0 = 0; c0 <= 1023; c0 += 4) 31; AST-STRIDE4: #pragma simd 32; AST-STRIDE4: for (int c1 = c0; c1 <= c0 + 3; c1 += 1) 33; AST-STRIDE4: Stmt_S(c1); 34 35; IR-LABEL: single_parallel_loop() 36; IR-NEXT: entry 37; IR-NEXT: %polly.par.userContext = alloca 38 39; IR-LABEL: polly.parallel.for: 40; IR-NEXT: %polly.par.userContext1 = bitcast {}* %polly.par.userContext to i8* 41; IR-NEXT: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @single_parallel_loop_polly_subfn, i8* %polly.par.userContext1, i32 0, i64 0, i64 1024, i64 1) 42; IR-NEXT: call void @single_parallel_loop_polly_subfn(i8* %polly.par.userContext1) 43; IR-NEXT: call void @GOMP_parallel_end() 44; IR-NEXT: br label %polly.exiting 45 46; IR: define internal void @single_parallel_loop_polly_subfn(i8* %polly.par.userContext) #1 47; IR-LABEL: polly.par.setup: 48; IR-NEXT: %polly.par.LBPtr = alloca i64 49; IR-NEXT: %polly.par.UBPtr = alloca i64 50; IR-NEXT: %polly.par.userContext1 = 51; IR: br label %polly.par.checkNext 52 53; IR-LABEL: polly.par.exit: 54; IR-NEXT: call void @GOMP_loop_end_nowait() 55; IR-NEXT: ret void 56 57; IR-LABEL: polly.par.checkNext: 58; IR-NEXT: %[[parnext:[._a-zA-Z0-9]*]] = call i8 @GOMP_loop_runtime_next(i64* %polly.par.LBPtr, i64* %polly.par.UBPtr) 59; IR-NEXT: %[[cmp:[._a-zA-Z0-9]*]] = icmp ne i8 %[[parnext]], 0 60; IR-NEXT: br i1 %[[cmp]], label %polly.par.loadIVBounds, label %polly.par.exit 61 62; IR-LABEL: polly.par.loadIVBounds: 63; IR-NEXT: %polly.par.LB = load i64, i64* %polly.par.LBPtr 64; IR-NEXT: %polly.par.UB = load i64, i64* %polly.par.UBPtr 65; IR-NEXT: %polly.par.UBAdjusted = sub i64 %polly.par.UB, 1 66; IR-NEXT: br label %polly.loop_preheader 67 68; IR-LABEL: polly.loop_exit: 69; IR-NEXT: br label %polly.par.checkNext 70 71; IR-LABEL: polly.loop_header: 72; IR-NEXT: %polly.indvar = phi i64 [ %polly.par.LB, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.S ] 73; IR-NEXT: br label %polly.stmt.S 74 75; IR-LABEL: polly.stmt.S: 76; IR-NEXT: %[[gep:[._a-zA-Z0-9]*]] = getelementptr [1024 x float], [1024 x float]* {{.*}}, i64 0, i64 %polly.indvar 77; IR-NEXT: store float 1.000000e+00, float* %[[gep]] 78; IR-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1 79; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, %polly.par.UBAdjusted 80; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit 81 82; IR-LABEL: polly.loop_preheader: 83; IR-NEXT: br label %polly.loop_header 84 85; IR: attributes #1 = { "polly.skip.fn" } 86 87; IR-STRIDE4: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @single_parallel_loop_polly_subfn, i8* %polly.par.userContext1, i32 0, i64 0, i64 1024, i64 4) 88; IR-STRIDE4: add nsw i64 %polly.indvar, 3 89; IR-STRIDE4: %polly.indvar_next = add nsw i64 %polly.indvar, 4 90; IR-STRIDE4 %polly.adjust_ub = sub i64 %polly.par.UBAdjusted, 4 91 92; LIBOMP-IR-STATIC-CHUNKED: %struct.ident_t = type { i32, i32, i32, i32, i8* } 93 94; LIBOMP-IR-STATIC-CHUNKED-LABEL: single_parallel_loop() 95; LIBOMP-IR-STATIC-CHUNKED-NEXT: entry 96; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.par.userContext = alloca 97 98; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.parallel.for: 99; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.par.userContext1 = bitcast {}* %polly.par.userContext to i8* 100; LIBOMP-IR-STATIC-CHUNKED-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @.loc.dummy, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i8*)* @single_parallel_loop_polly_subfn to void (i32*, i32*, ...)*), i64 0, i64 1024, i64 1, i8* %polly.par.userContext1) 101; LIBOMP-IR-STATIC-CHUNKED-NEXT: br label %polly.exiting 102 103; LIBOMP-IR-STATIC-CHUNKED: define internal void @single_parallel_loop_polly_subfn(i32* %polly.kmpc.global_tid, i32* %polly.kmpc.bound_tid, i64 %polly.kmpc.lb, i64 %polly.kmpc.ub, i64 %polly.kmpc.inc, i8* %polly.kmpc.shared) 104; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.par.setup: 105; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.par.LBPtr = alloca i64 106; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.par.UBPtr = alloca i64 107; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.par.lastIterPtr = alloca i32 108; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.par.StridePtr = alloca i64 109; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.par.userContext = bitcast i8* %polly.kmpc.shared 110; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.par.global_tid = load i32, i32* %polly.kmpc.global_tid 111; LIBOMP-IR-STATIC-CHUNKED-NEXT: store i64 %polly.kmpc.lb, i64* %polly.par.LBPtr 112; LIBOMP-IR-STATIC-CHUNKED-NEXT: store i64 %polly.kmpc.ub, i64* %polly.par.UBPtr 113; LIBOMP-IR-STATIC-CHUNKED-NEXT: store i32 0, i32* %polly.par.lastIterPtr 114; LIBOMP-IR-STATIC-CHUNKED-NEXT: store i64 %polly.kmpc.inc, i64* %polly.par.StridePtr 115; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.indvar.UBAdjusted = add i64 %polly.kmpc.ub, -1 116; LIBOMP-IR-STATIC-CHUNKED-NEXT: store i64 %polly.indvar.UBAdjusted, i64* %polly.par.UBPtr, align 8 117; LIBOMP-IR-STATIC-CHUNKED-NEXT: call void @__kmpc_for_static_init_{{[4|8]}}(%struct.ident_t* @.loc.dummy{{[.0-9]*}}, i32 %polly.par.global_tid, i32 33, i32* %polly.par.lastIterPtr, i64* %polly.par.LBPtr, i64* %polly.par.UBPtr, i64* %polly.par.StridePtr, i64 1, i64 43) 118; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.kmpc.stride = load i64, i64* %polly.par.StridePtr, align 8 119; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.indvar.LB = load i64, i64* %polly.par.LBPtr 120; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.indvar.UB.temp = load i64, i64* %polly.par.UBPtr 121; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.indvar.UB.inRange = icmp sle i64 %polly.indvar.UB.temp, %polly.indvar.UBAdjusted 122; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.indvar.UB = select i1 %polly.indvar.UB.inRange, i64 %polly.indvar.UB.temp, i64 %polly.indvar.UBAdjusted 123; LIBOMP-IR-STATIC-CHUNKED-NEXT: store i64 %polly.indvar.UB, i64* %polly.par.UBPtr, align 8 124; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.hasIteration = icmp sle i64 %polly.indvar.LB, %polly.indvar.UB 125; LIBOMP-IR-STATIC-CHUNKED: br i1 %polly.hasIteration, label %polly.par.loadIVBounds, label %polly.par.exit 126 127; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.par.exit: 128; LIBOMP-IR-STATIC-CHUNKED-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @.loc.dummy, i32 %polly.par.global_tid) 129; LIBOMP-IR-STATIC-CHUNKED-NEXT: ret void 130 131; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.par.checkNext: 132; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.indvar.nextLB = add i64 %polly.indvar.LB.entry, %polly.kmpc.stride 133; LIBOMP-IR-STATIC-CHUNKED-NEXT: %{{[0-9]+}} = add i64 %polly.indvar.UB.entry, %polly.kmpc.stride 134; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.indvar.nextUB.outOfBounds = icmp sgt i64 %{{[0-9]+}}, %polly.indvar.UBAdjusted 135; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.indvar.nextUB = select i1 %polly.indvar.nextUB.outOfBounds, i64 %polly.indvar.UBAdjusted, i64 %{{[0-9]+}} 136; LIBOMP-IR-STATIC-CHUNKED-NEXT: store i64 %polly.indvar.nextLB, i64* %polly.par.LBPtr, align 8 137; LIBOMP-IR-STATIC-CHUNKED-NEXT: store i64 %polly.indvar.nextUB, i64* %polly.par.UBPtr, align 8 138; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.hasWork = icmp sle i64 %polly.indvar.nextLB, %polly.indvar.UBAdjusted 139; LIBOMP-IR-STATIC-CHUNKED-NEXT: br i1 %polly.hasWork, label %polly.par.loadIVBounds, label %polly.par.exit 140 141; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.par.loadIVBounds: 142; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.indvar.LB.entry = load i64, i64* %polly.par.LBPtr, align 8 143; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.indvar.UB.entry = load i64, i64* %polly.par.UBPtr, align 8 144; LIBOMP-IR-STATIC-CHUNKED-NEXT: br label %polly.loop_preheader 145 146; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.loop_exit: 147; LIBOMP-IR-STATIC-CHUNKED-NEXT: br label %polly.par.checkNext 148 149; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.loop_header: 150; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.indvar = phi i64 [ %polly.indvar.LB.entry, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.S ] 151; LIBOMP-IR-STATIC-CHUNKED-NEXT: br label %polly.stmt.S 152 153; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.stmt.S: 154; LIBOMP-IR-STATIC-CHUNKED-NEXT: %[[gep:[._a-zA-Z0-9]*]] = getelementptr [1024 x float], [1024 x float]* {{.*}}, i64 0, i64 %polly.indvar 155; LIBOMP-IR-STATIC-CHUNKED-NEXT: store float 1.000000e+00, float* %[[gep]] 156; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1 157; LIBOMP-IR-STATIC-CHUNKED-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, %polly.indvar.UB.entry 158; LIBOMP-IR-STATIC-CHUNKED-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit 159 160; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.loop_preheader: 161; LIBOMP-IR-STATIC-CHUNKED-NEXT: br label %polly.loop_header 162 163; LIBOMP-IR-STATIC-CHUNKED: attributes #1 = { "polly.skip.fn" } 164 165; LIBOMP-IR-STATIC: define internal void @single_parallel_loop_polly_subfn(i32* %polly.kmpc.global_tid, i32* %polly.kmpc.bound_tid, i64 %polly.kmpc.lb, i64 %polly.kmpc.ub, i64 %polly.kmpc.inc, i8* %polly.kmpc.shared) 166; LIBOMP-IR-STATIC-LABEL: polly.par.setup: 167; LIBOMP-IR-STATIC: call void @__kmpc_for_static_init_{{[4|8]}}(%struct.ident_t* @.loc.dummy{{[.0-9]*}}, i32 %polly.par.global_tid, i32 34, i32* %polly.par.lastIterPtr, i64* %polly.par.LBPtr, i64* %polly.par.UBPtr, i64* %polly.par.StridePtr, i64 1, i64 1) 168; LIBOMP-IR-STATIC: br i1 %polly.hasIteration, label %polly.par.loadIVBounds, label %polly.par.exit 169 170; LIBOMP-IR-STATIC-LABEL: polly.par.exit: 171; LIBOMP-IR-STATIC-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @.loc.dummy, i32 %polly.par.global_tid) 172; LIBOMP-IR-STATIC-NEXT: ret void 173 174; LIBOMP-IR-STATIC-LABEL: polly.par.checkNext: 175; LIBOMP-IR-STATIC-NEXT: br label %polly.par.exit 176 177; LIBOMP-IR-STATIC-LABEL: polly.par.loadIVBounds: 178; LIBOMP-IR-STATIC-NEXT: br label %polly.loop_preheader 179 180; LIBOMP-IR-STATIC-LABEL: polly.loop_exit: 181; LIBOMP-IR-STATIC-NEXT: br label %polly.par.checkNext 182 183; LIBOMP-IR-STATIC-LABEL: polly.loop_header: 184; LIBOMP-IR-STATIC-NEXT: %polly.indvar = phi i64 [ %polly.indvar.LB, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.S ] 185; LIBOMP-IR-STATIC-NEXT: br label %polly.stmt.S 186 187; LIBOMP-IR-STATIC-LABEL: polly.stmt.S: 188; LIBOMP-IR-STATIC-NEXT: %[[gep:[._a-zA-Z0-9]*]] = getelementptr [1024 x float], [1024 x float]* {{.*}}, i64 0, i64 %polly.indvar 189; LIBOMP-IR-STATIC-NEXT: store float 1.000000e+00, float* %[[gep]] 190; LIBOMP-IR-STATIC-NEXT: %polly.indvar_next = add nsw i64 %polly.indvar, 1 191; LIBOMP-IR-STATIC-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, %polly.indvar.UB 192; LIBOMP-IR-STATIC-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit 193 194; LIBOMP-IR-STATIC-LABEL: polly.loop_preheader: 195; LIBOMP-IR-STATIC-NEXT: br label %polly.loop_header 196 197; LIBOMP-IR-DYNAMIC: call void @__kmpc_dispatch_init_{{[4|8]}}(%struct.ident_t* @.loc.dummy, i32 %polly.par.global_tid, i32 35, i64 %polly.kmpc.lb, i64 %polly.indvar.UBAdjusted, i64 %polly.kmpc.inc, i64 1) 198; LIBOMP-IR-DYNAMIC-NEXT: %{{[0-9]+}} = call i32 @__kmpc_dispatch_next_{{[4|8]}}(%struct.ident_t* @.loc.dummy, i32 %polly.par.global_tid, i32* %polly.par.lastIterPtr, i64* %polly.par.LBPtr, i64* %polly.par.UBPtr, i64* %polly.par.StridePtr) 199; LIBOMP-IR-DYNAMIC-NEXT: %polly.hasIteration = icmp eq i32 %{{[0-9]+}}, 1 200; LIBOMP-IR-DYNAMIC-NEXT: br i1 %polly.hasIteration, label %polly.par.loadIVBounds, label %polly.par.exit 201 202; LIBOMP-IR-DYNAMIC-LABEL: polly.par.exit: 203; LIBOMP-IR-DYNAMIC-NEXT: ret void 204 205; LIBOMP-IR-DYNAMIC-LABEL: polly.par.checkNext: 206; LIBOMP-IR-DYNAMIC-NEXT: %{{[0-9]+}} = call i32 @__kmpc_dispatch_next_{{[4|8]}}(%struct.ident_t* @.loc.dummy, i32 %polly.par.global_tid, i32* %polly.par.lastIterPtr, i64* %polly.par.LBPtr, i64* %polly.par.UBPtr, i64* %polly.par.StridePtr) 207; LIBOMP-IR-DYNAMIC-NEXT: %polly.hasWork = icmp eq i32 %{{[0-9]+}}, 1 208; LIBOMP-IR-DYNAMIC-NEXT: br i1 %polly.hasWork, label %polly.par.loadIVBounds, label %polly.par.exit 209 210; LIBOMP-IR-DYNAMIC-LABEL: polly.par.loadIVBounds: 211; LIBOMP-IR-DYNAMIC-NEXT: %polly.indvar.LB = load i64, i64* %polly.par.LBPtr 212; LIBOMP-IR-DYNAMIC-NEXT: %polly.indvar.UB = load i64, i64* %polly.par.UBPtr 213; LIBOMP-IR-DYNAMIC-NEXT: br label %polly.loop_preheader 214 215; LIBOMP-IR-DYNAMIC-FOUR: call void @__kmpc_dispatch_init_{{[4|8]}}(%struct.ident_t* @.loc.dummy, i32 %polly.par.global_tid, i32 35, i64 %polly.kmpc.lb, i64 %polly.indvar.UBAdjusted, i64 %polly.kmpc.inc, i64 4) 216 217; LIBOMP-IR-STRIDE4: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @.loc.dummy, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i8*)* @single_parallel_loop_polly_subfn to void (i32*, i32*, ...)*), i64 0, i64 1024, i64 4, i8* %polly.par.userContext1) 218 219target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" 220 221@A = common global [1024 x float] zeroinitializer, align 16 222 223define void @single_parallel_loop() nounwind { 224entry: 225 br label %for.i 226 227for.i: 228 %indvar = phi i64 [ %indvar.next, %for.inc], [ 0, %entry ] 229 %scevgep = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar 230 %exitcond = icmp ne i64 %indvar, 1024 231 br i1 %exitcond, label %S, label %exit 232 233S: 234 store float 1.0, float* %scevgep 235 br label %for.inc 236 237for.inc: 238 %indvar.next = add i64 %indvar, 1 239 br label %for.i 240 241exit: 242 ret void 243} 244