1; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-print-ast -disable-output < %s | FileCheck %s -check-prefix=AST
2; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -S -verify-dom-info < %s | FileCheck %s -check-prefix=IR
3
4; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-import-jscop -polly-print-ast -disable-output < %s | FileCheck %s -check-prefix=AST-STRIDE4
5; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-import-jscop -polly-codegen -S < %s | FileCheck %s -check-prefix=IR-STRIDE4
6
7; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -polly-omp-backend=LLVM -polly-scheduling=static -polly-scheduling-chunksize=43 -S -verify-dom-info < %s | FileCheck %s -check-prefix=LIBOMP-IR-STATIC-CHUNKED
8; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -polly-omp-backend=LLVM -polly-scheduling=static -S -verify-dom-info < %s | FileCheck %s -check-prefix=LIBOMP-IR-STATIC
9; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -polly-omp-backend=LLVM -polly-scheduling=dynamic -S -verify-dom-info < %s | FileCheck %s -check-prefix=LIBOMP-IR-DYNAMIC
10; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-codegen -polly-omp-backend=LLVM -polly-scheduling=dynamic -polly-scheduling-chunksize=4 -S -verify-dom-info < %s | FileCheck %s -check-prefix=LIBOMP-IR-DYNAMIC-FOUR
11; RUN: opt %loadPolly -polly-parallel -polly-parallel-force -polly-import-jscop -polly-codegen -polly-omp-backend=LLVM -S < %s | FileCheck %s -check-prefix=LIBOMP-IR-STRIDE4
12
13; This extensive test case tests the creation of the full set of OpenMP calls
14; as well as the subfunction creation using a trivial loop as example.
15;
16; #define N 1024
17; float A[N];
18;
19; void single_parallel_loop(void) {
20;   for (long i = 0; i < N; i++)
21;     A[i] = 1;
22; }
23
24; AST: #pragma simd
25; AST: #pragma omp parallel for
26; AST: for (int c0 = 0; c0 <= 1023; c0 += 1)
27; AST:   Stmt_S(c0);
28
29; AST-STRIDE4: #pragma omp parallel for
30; AST-STRIDE4: for (int c0 = 0; c0 <= 1023; c0 += 4)
31; AST-STRIDE4:   #pragma simd
32; AST-STRIDE4:   for (int c1 = c0; c1 <= c0 + 3; c1 += 1)
33; AST-STRIDE4:     Stmt_S(c1);
34
35; IR-LABEL: single_parallel_loop()
36; IR-NEXT: entry
37; IR-NEXT:   %polly.par.userContext = alloca
38
39; IR-LABEL: polly.parallel.for:
40; IR-NEXT:   %polly.par.userContext1 = bitcast {}* %polly.par.userContext to i8*
41; IR-NEXT:   call void @GOMP_parallel_loop_runtime_start(void (i8*)* @single_parallel_loop_polly_subfn, i8* %polly.par.userContext1, i32 0, i64 0, i64 1024, i64 1)
42; IR-NEXT:   call void @single_parallel_loop_polly_subfn(i8* %polly.par.userContext1)
43; IR-NEXT:   call void @GOMP_parallel_end()
44; IR-NEXT:   br label %polly.exiting
45
46; IR: define internal void @single_parallel_loop_polly_subfn(i8* %polly.par.userContext) #1
47; IR-LABEL: polly.par.setup:
48; IR-NEXT:   %polly.par.LBPtr = alloca i64
49; IR-NEXT:   %polly.par.UBPtr = alloca i64
50; IR-NEXT:   %polly.par.userContext1 =
51; IR:   br label %polly.par.checkNext
52
53; IR-LABEL: polly.par.exit:
54; IR-NEXT:   call void @GOMP_loop_end_nowait()
55; IR-NEXT:   ret void
56
57; IR-LABEL: polly.par.checkNext:
58; IR-NEXT:   %[[parnext:[._a-zA-Z0-9]*]] = call i8 @GOMP_loop_runtime_next(i64* %polly.par.LBPtr, i64* %polly.par.UBPtr)
59; IR-NEXT:   %[[cmp:[._a-zA-Z0-9]*]] = icmp ne i8 %[[parnext]], 0
60; IR-NEXT:   br i1 %[[cmp]], label %polly.par.loadIVBounds, label %polly.par.exit
61
62; IR-LABEL: polly.par.loadIVBounds:
63; IR-NEXT:   %polly.par.LB = load i64, i64* %polly.par.LBPtr
64; IR-NEXT:   %polly.par.UB = load i64, i64* %polly.par.UBPtr
65; IR-NEXT:   %polly.par.UBAdjusted = sub i64 %polly.par.UB, 1
66; IR-NEXT:   br label %polly.loop_preheader
67
68; IR-LABEL: polly.loop_exit:
69; IR-NEXT:   br label %polly.par.checkNext
70
71; IR-LABEL: polly.loop_header:
72; IR-NEXT:   %polly.indvar = phi i64 [ %polly.par.LB, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.S ]
73; IR-NEXT:   br label %polly.stmt.S
74
75; IR-LABEL: polly.stmt.S:
76; IR-NEXT:   %[[gep:[._a-zA-Z0-9]*]] = getelementptr [1024 x float], [1024 x float]* {{.*}}, i64 0, i64 %polly.indvar
77; IR-NEXT:   store float 1.000000e+00, float* %[[gep]]
78; IR-NEXT:   %polly.indvar_next = add nsw i64 %polly.indvar, 1
79; IR-NEXT:   %polly.loop_cond = icmp sle i64 %polly.indvar_next, %polly.par.UBAdjusted
80; IR-NEXT:   br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
81
82; IR-LABEL: polly.loop_preheader:
83; IR-NEXT:   br label %polly.loop_header
84
85; IR: attributes #1 = { "polly.skip.fn" }
86
87; IR-STRIDE4:   call void @GOMP_parallel_loop_runtime_start(void (i8*)* @single_parallel_loop_polly_subfn, i8* %polly.par.userContext1, i32 0, i64 0, i64 1024, i64 4)
88; IR-STRIDE4:  add nsw i64 %polly.indvar, 3
89; IR-STRIDE4:  %polly.indvar_next = add nsw i64 %polly.indvar, 4
90; IR-STRIDE4   %polly.adjust_ub = sub i64 %polly.par.UBAdjusted, 4
91
92; LIBOMP-IR-STATIC-CHUNKED: %struct.ident_t = type { i32, i32, i32, i32, i8* }
93
94; LIBOMP-IR-STATIC-CHUNKED-LABEL: single_parallel_loop()
95; LIBOMP-IR-STATIC-CHUNKED-NEXT: entry
96; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.par.userContext = alloca
97
98; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.parallel.for:
99; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.par.userContext1 = bitcast {}* %polly.par.userContext to i8*
100; LIBOMP-IR-STATIC-CHUNKED-NEXT:   call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @.loc.dummy, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i8*)* @single_parallel_loop_polly_subfn to void (i32*, i32*, ...)*), i64 0, i64 1024, i64 1, i8* %polly.par.userContext1)
101; LIBOMP-IR-STATIC-CHUNKED-NEXT:   br label %polly.exiting
102
103; LIBOMP-IR-STATIC-CHUNKED: define internal void @single_parallel_loop_polly_subfn(i32* %polly.kmpc.global_tid, i32* %polly.kmpc.bound_tid, i64 %polly.kmpc.lb, i64 %polly.kmpc.ub, i64 %polly.kmpc.inc, i8* %polly.kmpc.shared)
104; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.par.setup:
105; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.par.LBPtr = alloca i64
106; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.par.UBPtr = alloca i64
107; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.par.lastIterPtr = alloca i32
108; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.par.StridePtr = alloca i64
109; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.par.userContext = bitcast i8* %polly.kmpc.shared
110; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.par.global_tid = load i32, i32* %polly.kmpc.global_tid
111; LIBOMP-IR-STATIC-CHUNKED-NEXT:   store i64 %polly.kmpc.lb, i64* %polly.par.LBPtr
112; LIBOMP-IR-STATIC-CHUNKED-NEXT:   store i64 %polly.kmpc.ub, i64* %polly.par.UBPtr
113; LIBOMP-IR-STATIC-CHUNKED-NEXT:   store i32 0, i32* %polly.par.lastIterPtr
114; LIBOMP-IR-STATIC-CHUNKED-NEXT:   store i64 %polly.kmpc.inc, i64* %polly.par.StridePtr
115; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.indvar.UBAdjusted = add i64 %polly.kmpc.ub, -1
116; LIBOMP-IR-STATIC-CHUNKED-NEXT:   store i64 %polly.indvar.UBAdjusted, i64* %polly.par.UBPtr, align 8
117; LIBOMP-IR-STATIC-CHUNKED-NEXT:   call void @__kmpc_for_static_init_{{[4|8]}}(%struct.ident_t* @.loc.dummy{{[.0-9]*}}, i32 %polly.par.global_tid, i32 33, i32* %polly.par.lastIterPtr, i64* %polly.par.LBPtr, i64* %polly.par.UBPtr, i64* %polly.par.StridePtr, i64 1, i64 43)
118; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.kmpc.stride = load i64, i64* %polly.par.StridePtr, align 8
119; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.indvar.LB = load i64, i64* %polly.par.LBPtr
120; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.indvar.UB.temp = load i64, i64* %polly.par.UBPtr
121; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.indvar.UB.inRange = icmp sle i64 %polly.indvar.UB.temp, %polly.indvar.UBAdjusted
122; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.indvar.UB = select i1 %polly.indvar.UB.inRange, i64 %polly.indvar.UB.temp, i64 %polly.indvar.UBAdjusted
123; LIBOMP-IR-STATIC-CHUNKED-NEXT:   store i64 %polly.indvar.UB, i64* %polly.par.UBPtr, align 8
124; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.hasIteration = icmp sle i64 %polly.indvar.LB, %polly.indvar.UB
125; LIBOMP-IR-STATIC-CHUNKED:   br i1 %polly.hasIteration, label %polly.par.loadIVBounds, label %polly.par.exit
126
127; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.par.exit:
128; LIBOMP-IR-STATIC-CHUNKED-NEXT:   call void @__kmpc_for_static_fini(%struct.ident_t* @.loc.dummy, i32 %polly.par.global_tid)
129; LIBOMP-IR-STATIC-CHUNKED-NEXT:   ret void
130
131; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.par.checkNext:
132; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.indvar.nextLB = add i64 %polly.indvar.LB.entry, %polly.kmpc.stride
133; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %{{[0-9]+}} = add i64 %polly.indvar.UB.entry, %polly.kmpc.stride
134; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.indvar.nextUB.outOfBounds = icmp sgt i64 %{{[0-9]+}}, %polly.indvar.UBAdjusted
135; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.indvar.nextUB = select i1 %polly.indvar.nextUB.outOfBounds, i64 %polly.indvar.UBAdjusted, i64 %{{[0-9]+}}
136; LIBOMP-IR-STATIC-CHUNKED-NEXT:   store i64 %polly.indvar.nextLB, i64* %polly.par.LBPtr, align 8
137; LIBOMP-IR-STATIC-CHUNKED-NEXT:   store i64 %polly.indvar.nextUB, i64* %polly.par.UBPtr, align 8
138; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.hasWork = icmp sle i64 %polly.indvar.nextLB, %polly.indvar.UBAdjusted
139; LIBOMP-IR-STATIC-CHUNKED-NEXT:   br i1 %polly.hasWork, label %polly.par.loadIVBounds, label %polly.par.exit
140
141; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.par.loadIVBounds:
142; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.indvar.LB.entry = load i64, i64* %polly.par.LBPtr, align 8
143; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.indvar.UB.entry = load i64, i64* %polly.par.UBPtr, align 8
144; LIBOMP-IR-STATIC-CHUNKED-NEXT:   br label %polly.loop_preheader
145
146; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.loop_exit:
147; LIBOMP-IR-STATIC-CHUNKED-NEXT:   br label %polly.par.checkNext
148
149; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.loop_header:
150; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.indvar = phi i64 [ %polly.indvar.LB.entry, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.S ]
151; LIBOMP-IR-STATIC-CHUNKED-NEXT:   br label %polly.stmt.S
152
153; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.stmt.S:
154; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %[[gep:[._a-zA-Z0-9]*]] = getelementptr [1024 x float], [1024 x float]* {{.*}}, i64 0, i64 %polly.indvar
155; LIBOMP-IR-STATIC-CHUNKED-NEXT:   store float 1.000000e+00, float* %[[gep]]
156; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.indvar_next = add nsw i64 %polly.indvar, 1
157; LIBOMP-IR-STATIC-CHUNKED-NEXT:   %polly.loop_cond = icmp sle i64 %polly.indvar_next, %polly.indvar.UB.entry
158; LIBOMP-IR-STATIC-CHUNKED-NEXT:   br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
159
160; LIBOMP-IR-STATIC-CHUNKED-LABEL: polly.loop_preheader:
161; LIBOMP-IR-STATIC-CHUNKED-NEXT:   br label %polly.loop_header
162
163; LIBOMP-IR-STATIC-CHUNKED: attributes #1 = { "polly.skip.fn" }
164
165; LIBOMP-IR-STATIC: define internal void @single_parallel_loop_polly_subfn(i32* %polly.kmpc.global_tid, i32* %polly.kmpc.bound_tid, i64 %polly.kmpc.lb, i64 %polly.kmpc.ub, i64 %polly.kmpc.inc, i8* %polly.kmpc.shared)
166; LIBOMP-IR-STATIC-LABEL: polly.par.setup:
167; LIBOMP-IR-STATIC:   call void @__kmpc_for_static_init_{{[4|8]}}(%struct.ident_t* @.loc.dummy{{[.0-9]*}}, i32 %polly.par.global_tid, i32 34, i32* %polly.par.lastIterPtr, i64* %polly.par.LBPtr, i64* %polly.par.UBPtr, i64* %polly.par.StridePtr, i64 1, i64 1)
168; LIBOMP-IR-STATIC:   br i1 %polly.hasIteration, label %polly.par.loadIVBounds, label %polly.par.exit
169
170; LIBOMP-IR-STATIC-LABEL: polly.par.exit:
171; LIBOMP-IR-STATIC-NEXT:   call void @__kmpc_for_static_fini(%struct.ident_t* @.loc.dummy, i32 %polly.par.global_tid)
172; LIBOMP-IR-STATIC-NEXT:   ret void
173
174; LIBOMP-IR-STATIC-LABEL: polly.par.checkNext:
175; LIBOMP-IR-STATIC-NEXT:   br label %polly.par.exit
176
177; LIBOMP-IR-STATIC-LABEL: polly.par.loadIVBounds:
178; LIBOMP-IR-STATIC-NEXT:   br label %polly.loop_preheader
179
180; LIBOMP-IR-STATIC-LABEL: polly.loop_exit:
181; LIBOMP-IR-STATIC-NEXT:   br label %polly.par.checkNext
182
183; LIBOMP-IR-STATIC-LABEL: polly.loop_header:
184; LIBOMP-IR-STATIC-NEXT:   %polly.indvar = phi i64 [ %polly.indvar.LB, %polly.loop_preheader ], [ %polly.indvar_next, %polly.stmt.S ]
185; LIBOMP-IR-STATIC-NEXT:   br label %polly.stmt.S
186
187; LIBOMP-IR-STATIC-LABEL: polly.stmt.S:
188; LIBOMP-IR-STATIC-NEXT:   %[[gep:[._a-zA-Z0-9]*]] = getelementptr [1024 x float], [1024 x float]* {{.*}}, i64 0, i64 %polly.indvar
189; LIBOMP-IR-STATIC-NEXT:   store float 1.000000e+00, float* %[[gep]]
190; LIBOMP-IR-STATIC-NEXT:   %polly.indvar_next = add nsw i64 %polly.indvar, 1
191; LIBOMP-IR-STATIC-NEXT:   %polly.loop_cond = icmp sle i64 %polly.indvar_next, %polly.indvar.UB
192; LIBOMP-IR-STATIC-NEXT:   br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
193
194; LIBOMP-IR-STATIC-LABEL: polly.loop_preheader:
195; LIBOMP-IR-STATIC-NEXT:   br label %polly.loop_header
196
197; LIBOMP-IR-DYNAMIC:   call void @__kmpc_dispatch_init_{{[4|8]}}(%struct.ident_t* @.loc.dummy, i32 %polly.par.global_tid, i32 35, i64 %polly.kmpc.lb, i64 %polly.indvar.UBAdjusted, i64 %polly.kmpc.inc, i64 1)
198; LIBOMP-IR-DYNAMIC-NEXT:   %{{[0-9]+}} = call i32 @__kmpc_dispatch_next_{{[4|8]}}(%struct.ident_t* @.loc.dummy, i32 %polly.par.global_tid, i32* %polly.par.lastIterPtr, i64* %polly.par.LBPtr, i64* %polly.par.UBPtr, i64* %polly.par.StridePtr)
199; LIBOMP-IR-DYNAMIC-NEXT:   %polly.hasIteration = icmp eq i32 %{{[0-9]+}}, 1
200; LIBOMP-IR-DYNAMIC-NEXT:   br i1 %polly.hasIteration, label %polly.par.loadIVBounds, label %polly.par.exit
201
202; LIBOMP-IR-DYNAMIC-LABEL: polly.par.exit:
203; LIBOMP-IR-DYNAMIC-NEXT:   ret void
204
205; LIBOMP-IR-DYNAMIC-LABEL: polly.par.checkNext:
206; LIBOMP-IR-DYNAMIC-NEXT:   %{{[0-9]+}} = call i32 @__kmpc_dispatch_next_{{[4|8]}}(%struct.ident_t* @.loc.dummy, i32 %polly.par.global_tid, i32* %polly.par.lastIterPtr, i64* %polly.par.LBPtr, i64* %polly.par.UBPtr, i64* %polly.par.StridePtr)
207; LIBOMP-IR-DYNAMIC-NEXT:   %polly.hasWork = icmp eq i32 %{{[0-9]+}}, 1
208; LIBOMP-IR-DYNAMIC-NEXT:   br i1 %polly.hasWork, label %polly.par.loadIVBounds, label %polly.par.exit
209
210; LIBOMP-IR-DYNAMIC-LABEL: polly.par.loadIVBounds:
211; LIBOMP-IR-DYNAMIC-NEXT:   %polly.indvar.LB = load i64, i64* %polly.par.LBPtr
212; LIBOMP-IR-DYNAMIC-NEXT:   %polly.indvar.UB = load i64, i64* %polly.par.UBPtr
213; LIBOMP-IR-DYNAMIC-NEXT:   br label %polly.loop_preheader
214
215; LIBOMP-IR-DYNAMIC-FOUR:   call void @__kmpc_dispatch_init_{{[4|8]}}(%struct.ident_t* @.loc.dummy, i32 %polly.par.global_tid, i32 35, i64 %polly.kmpc.lb, i64 %polly.indvar.UBAdjusted, i64 %polly.kmpc.inc, i64 4)
216
217; LIBOMP-IR-STRIDE4:     call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @.loc.dummy, i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i8*)* @single_parallel_loop_polly_subfn to void (i32*, i32*, ...)*), i64 0, i64 1024, i64 4, i8* %polly.par.userContext1)
218
219target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
220
221@A = common global [1024 x float] zeroinitializer, align 16
222
223define void @single_parallel_loop() nounwind {
224entry:
225  br label %for.i
226
227for.i:
228  %indvar = phi i64 [ %indvar.next, %for.inc], [ 0, %entry ]
229  %scevgep = getelementptr [1024 x float], [1024 x float]* @A, i64 0, i64 %indvar
230  %exitcond = icmp ne i64 %indvar, 1024
231  br i1 %exitcond, label %S, label %exit
232
233S:
234  store float 1.0, float* %scevgep
235  br label %for.inc
236
237for.inc:
238  %indvar.next = add i64 %indvar, 1
239  br label %for.i
240
241exit:
242  ret void
243}
244