1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
2 // RUN: %clang_cc1 -no-opaque-pointers -fopenmp-enable-irbuilder -verify -fopenmp -fopenmp-version=51 -x c -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s
3 // expected-no-diagnostics
4
5 #ifndef HEADER
6 #define HEADER
7
8 // CHECK-LABEL: define {{.*}}@unroll_partial_factor_for_collapse(
9 // CHECK-NEXT: [[ENTRY:.*]]:
10 // CHECK-NEXT: %[[M_ADDR:.+]] = alloca i32, align 4
11 // CHECK-NEXT: %[[A_ADDR:.+]] = alloca float*, align 8
12 // CHECK-NEXT: %[[B_ADDR:.+]] = alloca float*, align 8
13 // CHECK-NEXT: %[[C_ADDR:.+]] = alloca float*, align 8
14 // CHECK-NEXT: %[[D_ADDR:.+]] = alloca float*, align 8
15 // CHECK-NEXT: %[[DOTOMP_IV:.+]] = alloca i64, align 8
16 // CHECK-NEXT: %[[TMP:.+]] = alloca i32, align 4
17 // CHECK-NEXT: %[[TMP1:.+]] = alloca i32, align 4
18 // CHECK-NEXT: %[[DOTCAPTURE_EXPR_:.+]] = alloca i32, align 4
19 // CHECK-NEXT: %[[J:.+]] = alloca i32, align 4
20 // CHECK-NEXT: %[[DOTCAPTURE_EXPR_2:.+]] = alloca i64, align 8
21 // CHECK-NEXT: %[[I:.+]] = alloca i32, align 4
22 // CHECK-NEXT: %[[DOTUNROLLED_IV_J:.+]] = alloca i32, align 4
23 // CHECK-NEXT: %[[DOTOMP_LB:.+]] = alloca i64, align 8
24 // CHECK-NEXT: %[[DOTOMP_UB:.+]] = alloca i64, align 8
25 // CHECK-NEXT: %[[DOTOMP_STRIDE:.+]] = alloca i64, align 8
26 // CHECK-NEXT: %[[DOTOMP_IS_LAST:.+]] = alloca i32, align 4
27 // CHECK-NEXT: %[[I6:.+]] = alloca i32, align 4
28 // CHECK-NEXT: %[[DOTUNROLLED_IV_J7:.+]] = alloca i32, align 4
29 // CHECK-NEXT: %[[DOTUNROLL_INNER_IV_J:.+]] = alloca i32, align 4
30 // CHECK-NEXT: store i32 %[[M:.+]], i32* %[[M_ADDR]], align 4
31 // CHECK-NEXT: store float* %[[A:.+]], float** %[[A_ADDR]], align 8
32 // CHECK-NEXT: store float* %[[B:.+]], float** %[[B_ADDR]], align 8
33 // CHECK-NEXT: store float* %[[C:.+]], float** %[[C_ADDR]], align 8
34 // CHECK-NEXT: store float* %[[D:.+]], float** %[[D_ADDR]], align 8
35 // CHECK-NEXT: %[[TMP0:.+]] = load i32, i32* %[[M_ADDR]], align 4
36 // CHECK-NEXT: store i32 %[[TMP0]], i32* %[[DOTCAPTURE_EXPR_]], align 4
37 // CHECK-NEXT: store i32 0, i32* %[[J]], align 4
38 // CHECK-NEXT: %[[TMP1_1:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
39 // CHECK-NEXT: %[[SUB:.+]] = sub nsw i32 %[[TMP1_1]], 0
40 // CHECK-NEXT: %[[DIV:.+]] = sdiv i32 %[[SUB]], 1
41 // CHECK-NEXT: %[[CONV:.+]] = sext i32 %[[DIV]] to i64
42 // CHECK-NEXT: %[[MUL:.+]] = mul nsw i64 %[[CONV]], 2
43 // CHECK-NEXT: %[[SUB3:.+]] = sub nsw i64 %[[MUL]], 1
44 // CHECK-NEXT: store i64 %[[SUB3]], i64* %[[DOTCAPTURE_EXPR_2]], align 8
45 // CHECK-NEXT: store i32 0, i32* %[[I]], align 4
46 // CHECK-NEXT: store i32 0, i32* %[[DOTUNROLLED_IV_J]], align 4
47 // CHECK-NEXT: %[[TMP2:.+]] = load i32, i32* %[[DOTCAPTURE_EXPR_]], align 4
48 // CHECK-NEXT: %[[CMP:.+]] = icmp slt i32 0, %[[TMP2]]
49 // CHECK-NEXT: br i1 %[[CMP]], label %[[OMP_PRECOND_THEN:.+]], label %[[OMP_PRECOND_END:.+]]
50 // CHECK-EMPTY:
51 // CHECK-NEXT: [[OMP_PRECOND_THEN]]:
52 // CHECK-NEXT: store i64 0, i64* %[[DOTOMP_LB]], align 8
53 // CHECK-NEXT: %[[TMP3:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8
54 // CHECK-NEXT: store i64 %[[TMP3]], i64* %[[DOTOMP_UB]], align 8
55 // CHECK-NEXT: store i64 1, i64* %[[DOTOMP_STRIDE]], align 8
56 // CHECK-NEXT: store i32 0, i32* %[[DOTOMP_IS_LAST]], align 4
57 // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @3)
58 // CHECK-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM]], i32 34, i32* %[[DOTOMP_IS_LAST]], i64* %[[DOTOMP_LB]], i64* %[[DOTOMP_UB]], i64* %[[DOTOMP_STRIDE]], i64 1, i64 1)
59 // CHECK-NEXT: %[[TMP4:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8
60 // CHECK-NEXT: %[[TMP5:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8
61 // CHECK-NEXT: %[[CMP8:.+]] = icmp sgt i64 %[[TMP4]], %[[TMP5]]
62 // CHECK-NEXT: br i1 %[[CMP8]], label %[[COND_TRUE:.+]], label %[[COND_FALSE:.+]]
63 // CHECK-EMPTY:
64 // CHECK-NEXT: [[COND_TRUE]]:
65 // CHECK-NEXT: %[[TMP6:.+]] = load i64, i64* %[[DOTCAPTURE_EXPR_2]], align 8
66 // CHECK-NEXT: br label %[[COND_END:.+]]
67 // CHECK-EMPTY:
68 // CHECK-NEXT: [[COND_FALSE]]:
69 // CHECK-NEXT: %[[TMP7:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8
70 // CHECK-NEXT: br label %[[COND_END]]
71 // CHECK-EMPTY:
72 // CHECK-NEXT: [[COND_END]]:
73 // CHECK-NEXT: %[[COND:.+]] = phi i64 [ %[[TMP6]], %[[COND_TRUE]] ], [ %[[TMP7]], %[[COND_FALSE]] ]
74 // CHECK-NEXT: store i64 %[[COND]], i64* %[[DOTOMP_UB]], align 8
75 // CHECK-NEXT: %[[TMP8:.+]] = load i64, i64* %[[DOTOMP_LB]], align 8
76 // CHECK-NEXT: store i64 %[[TMP8]], i64* %[[DOTOMP_IV]], align 8
77 // CHECK-NEXT: br label %[[OMP_INNER_FOR_COND:.+]]
78 // CHECK-EMPTY:
79 // CHECK-NEXT: [[OMP_INNER_FOR_COND]]:
80 // CHECK-NEXT: %[[TMP9:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
81 // CHECK-NEXT: %[[TMP10:.+]] = load i64, i64* %[[DOTOMP_UB]], align 8
82 // CHECK-NEXT: %[[CMP10:.+]] = icmp sle i64 %[[TMP9]], %[[TMP10]]
83 // CHECK-NEXT: br i1 %[[CMP10]], label %[[OMP_INNER_FOR_BODY:.+]], label %[[OMP_INNER_FOR_END:.+]]
84 // CHECK-EMPTY:
85 // CHECK-NEXT: [[OMP_INNER_FOR_BODY]]:
86 // CHECK-NEXT: %[[TMP11:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
87 // CHECK-NEXT: %[[DIV12:.+]] = sdiv i64 %[[TMP11]], 2
88 // CHECK-NEXT: %[[MUL13:.+]] = mul nsw i64 %[[DIV12]], 1
89 // CHECK-NEXT: %[[ADD:.+]] = add nsw i64 0, %[[MUL13]]
90 // CHECK-NEXT: %[[CONV14:.+]] = trunc i64 %[[ADD]] to i32
91 // CHECK-NEXT: store i32 %[[CONV14]], i32* %[[I6]], align 4
92 // CHECK-NEXT: %[[TMP12:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
93 // CHECK-NEXT: %[[TMP13:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
94 // CHECK-NEXT: %[[DIV15:.+]] = sdiv i64 %[[TMP13]], 2
95 // CHECK-NEXT: %[[MUL16:.+]] = mul nsw i64 %[[DIV15]], 2
96 // CHECK-NEXT: %[[SUB17:.+]] = sub nsw i64 %[[TMP12]], %[[MUL16]]
97 // CHECK-NEXT: %[[MUL18:.+]] = mul nsw i64 %[[SUB17]], 4
98 // CHECK-NEXT: %[[ADD19:.+]] = add nsw i64 0, %[[MUL18]]
99 // CHECK-NEXT: %[[CONV20:.+]] = trunc i64 %[[ADD19]] to i32
100 // CHECK-NEXT: store i32 %[[CONV20]], i32* %[[DOTUNROLLED_IV_J7]], align 4
101 // CHECK-NEXT: %[[TMP14:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4
102 // CHECK-NEXT: store i32 %[[TMP14]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4
103 // CHECK-NEXT: br label %[[FOR_COND:.+]]
104 // CHECK-EMPTY:
105 // CHECK-NEXT: [[FOR_COND]]:
106 // CHECK-NEXT: %[[TMP15:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
107 // CHECK-NEXT: %[[TMP16:.+]] = load i32, i32* %[[DOTUNROLLED_IV_J7]], align 4
108 // CHECK-NEXT: %[[ADD21:.+]] = add nsw i32 %[[TMP16]], 4
109 // CHECK-NEXT: %[[CMP22:.+]] = icmp slt i32 %[[TMP15]], %[[ADD21]]
110 // CHECK-NEXT: br i1 %[[CMP22]], label %[[LAND_RHS:.+]], label %[[LAND_END:.+]]
111 // CHECK-EMPTY:
112 // CHECK-NEXT: [[LAND_RHS]]:
113 // CHECK-NEXT: %[[TMP17:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
114 // CHECK-NEXT: %[[CMP24:.+]] = icmp slt i32 %[[TMP17]], 8
115 // CHECK-NEXT: br label %[[LAND_END]]
116 // CHECK-EMPTY:
117 // CHECK-NEXT: [[LAND_END]]:
118 // CHECK-NEXT: %[[TMP18:.+]] = phi i1 [ false, %[[FOR_COND]] ], [ %[[CMP24]], %[[LAND_RHS]] ]
119 // CHECK-NEXT: br i1 %[[TMP18]], label %[[FOR_BODY:.+]], label %[[FOR_END:.+]]
120 // CHECK-EMPTY:
121 // CHECK-NEXT: [[FOR_BODY]]:
122 // CHECK-NEXT: %[[TMP19:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
123 // CHECK-NEXT: %[[MUL26:.+]] = mul nsw i32 %[[TMP19]], 1
124 // CHECK-NEXT: %[[ADD27:.+]] = add nsw i32 0, %[[MUL26]]
125 // CHECK-NEXT: store i32 %[[ADD27]], i32* %[[J]], align 4
126 // CHECK-NEXT: %[[TMP20:.+]] = load float*, float** %[[B_ADDR]], align 8
127 // CHECK-NEXT: %[[TMP21:.+]] = load i32, i32* %[[I6]], align 4
128 // CHECK-NEXT: %[[IDXPROM:.+]] = sext i32 %[[TMP21]] to i64
129 // CHECK-NEXT: %[[ARRAYIDX:.+]] = getelementptr inbounds float, float* %[[TMP20]], i64 %[[IDXPROM]]
130 // CHECK-NEXT: %[[TMP22:.+]] = load float, float* %[[ARRAYIDX]], align 4
131 // CHECK-NEXT: %[[TMP23:.+]] = load float*, float** %[[C_ADDR]], align 8
132 // CHECK-NEXT: %[[TMP24:.+]] = load i32, i32* %[[I6]], align 4
133 // CHECK-NEXT: %[[IDXPROM28:.+]] = sext i32 %[[TMP24]] to i64
134 // CHECK-NEXT: %[[ARRAYIDX29:.+]] = getelementptr inbounds float, float* %[[TMP23]], i64 %[[IDXPROM28]]
135 // CHECK-NEXT: %[[TMP25:.+]] = load float, float* %[[ARRAYIDX29]], align 4
136 // CHECK-NEXT: %[[TMP26:.+]] = load float*, float** %[[D_ADDR]], align 8
137 // CHECK-NEXT: %[[TMP27:.+]] = load i32, i32* %[[J]], align 4
138 // CHECK-NEXT: %[[IDXPROM30:.+]] = sext i32 %[[TMP27]] to i64
139 // CHECK-NEXT: %[[ARRAYIDX31:.+]] = getelementptr inbounds float, float* %[[TMP26]], i64 %[[IDXPROM30]]
140 // CHECK-NEXT: %[[TMP28:.+]] = load float, float* %[[ARRAYIDX31]], align 4
141 // CHECK-NEXT: %[[MUL32:.+]] = fmul float %[[TMP25]], %[[TMP28]]
142 // CHECK-NEXT: %[[ADD33:.+]] = fadd float %[[TMP22]], %[[MUL32]]
143 // CHECK-NEXT: %[[TMP29:.+]] = load float*, float** %[[A_ADDR]], align 8
144 // CHECK-NEXT: %[[TMP30:.+]] = load i32, i32* %[[I6]], align 4
145 // CHECK-NEXT: %[[IDXPROM34:.+]] = sext i32 %[[TMP30]] to i64
146 // CHECK-NEXT: %[[ARRAYIDX35:.+]] = getelementptr inbounds float, float* %[[TMP29]], i64 %[[IDXPROM34]]
147 // CHECK-NEXT: %[[TMP31:.+]] = load float, float* %[[ARRAYIDX35]], align 4
148 // CHECK-NEXT: %[[ADD36:.+]] = fadd float %[[TMP31]], %[[ADD33]]
149 // CHECK-NEXT: store float %[[ADD36]], float* %[[ARRAYIDX35]], align 4
150 // CHECK-NEXT: br label %[[FOR_INC:.+]]
151 // CHECK-EMPTY:
152 // CHECK-NEXT: [[FOR_INC]]:
153 // CHECK-NEXT: %[[TMP32:.+]] = load i32, i32* %[[DOTUNROLL_INNER_IV_J]], align 4
154 // CHECK-NEXT: %[[INC:.+]] = add nsw i32 %[[TMP32]], 1
155 // CHECK-NEXT: store i32 %[[INC]], i32* %[[DOTUNROLL_INNER_IV_J]], align 4
156 // CHECK-NEXT: br label %[[FOR_COND]], !llvm.loop ![[LOOP3:[0-9]+]]
157 // CHECK-EMPTY:
158 // CHECK-NEXT: [[FOR_END]]:
159 // CHECK-NEXT: br label %[[OMP_BODY_CONTINUE:.+]]
160 // CHECK-EMPTY:
161 // CHECK-NEXT: [[OMP_BODY_CONTINUE]]:
162 // CHECK-NEXT: br label %[[OMP_INNER_FOR_INC:.+]]
163 // CHECK-EMPTY:
164 // CHECK-NEXT: [[OMP_INNER_FOR_INC]]:
165 // CHECK-NEXT: %[[TMP33:.+]] = load i64, i64* %[[DOTOMP_IV]], align 8
166 // CHECK-NEXT: %[[ADD37:.+]] = add nsw i64 %[[TMP33]], 1
167 // CHECK-NEXT: store i64 %[[ADD37]], i64* %[[DOTOMP_IV]], align 8
168 // CHECK-NEXT: br label %[[OMP_INNER_FOR_COND]]
169 // CHECK-EMPTY:
170 // CHECK-NEXT: [[OMP_INNER_FOR_END]]:
171 // CHECK-NEXT: br label %[[OMP_LOOP_EXIT:.+]]
172 // CHECK-EMPTY:
173 // CHECK-NEXT: [[OMP_LOOP_EXIT]]:
174 // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM38:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @5)
175 // CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @1, i32 %[[OMP_GLOBAL_THREAD_NUM38]])
176 // CHECK-NEXT: br label %[[OMP_PRECOND_END]]
177 // CHECK-EMPTY:
178 // CHECK-NEXT: [[OMP_PRECOND_END]]:
179 // CHECK-NEXT: %[[OMP_GLOBAL_THREAD_NUM39:.+]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @7)
180 // CHECK-NEXT: call void @__kmpc_barrier(%struct.ident_t* @6, i32 %[[OMP_GLOBAL_THREAD_NUM39]])
181 // CHECK-NEXT: ret void
182 // CHECK-NEXT: }
unroll_partial_factor_for_collapse(int m,float * a,float * b,float * c,float * d)183 void unroll_partial_factor_for_collapse(int m, float *a, float *b, float *c, float *d) {
184 #pragma omp for collapse(2)
185 for (int i = 0; i < m; i++) {
186 #pragma omp unroll partial(4)
187 for (int j = 0; j < 8; j++) {
188 a[i] += b[i] + c[i] * d[j];
189 }
190 }
191 }
192
193 #endif // HEADER
194
195 // CHECK: ![[META0:[0-9]+]] = !{i32 1, !"wchar_size", i32 4}
196 // CHECK: ![[META1:[0-9]+]] = !{i32 7, !"openmp", i32 51}
197 // CHECK: ![[META2:[0-9]+]] =
198 // CHECK: ![[LOOP3]] = distinct !{![[LOOP3]], ![[LOOPPROP4:[0-9]+]], ![[LOOPPROP5:[0-9]+]]}
199 // CHECK: ![[LOOPPROP4]] = !{!"llvm.loop.mustprogress"}
200 // CHECK: ![[LOOPPROP5]] = !{!"llvm.loop.unroll.count", i32 4}
201