1; RUN: opt -passes=openmp-opt -pass-remarks=openmp-opt -pass-remarks-missed=openmp-opt -pass-remarks-analysis=openmp-opt -disable-output < %s 2>&1 | FileCheck %s
2target triple = "nvptx64"
3
4; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:13:5: Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override.
5; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:15:5: Value has potential side effects preventing SPMD-mode execution. Add `__attribute__((assume("ompx_spmd_amenable")))` to the called function to override.
6; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:11:1: Generic-mode kernel is executed with a customized state machine that requires a fallback.
7; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:13:5: Call may contain unknown parallel regions. Use `__attribute__((assume("omp_no_parallelism")))` to override.
8; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:15:5: Call may contain unknown parallel regions. Use `__attribute__((assume("omp_no_parallelism")))` to override.
9; CHECK: remark: llvm/test/Transforms/OpenMP/spmdization_remarks.c:20:1: Transformed generic-mode kernel to SPMD-mode.
10
11
12;; void unknown(void);
13;; void known(void) {
14;;   #pragma omp parallel
15;;   {
16;;     unknown();
17;;   }
18;; }
19;;
20;; void test_fallback(void) {
21;;   #pragma omp target teams
22;;   {
23;;     unknown();
24;;     known();
25;;     unknown();
26;;   }
27;; }
28;;
29;; void no_openmp(void) __attribute__((assume("omp_no_openmp")));
30;; void test_no_fallback(void) {
31;;   #pragma omp target teams
32;;   {
33;;     known();
34;;     known();
35;;     known();
36;;     spmd_amenable();
37;;   }
38;; }
39
40%struct.ident_t = type { i32, i32, i32, i32, i8* }
41
42@0 = private unnamed_addr constant [103 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;1;;\00", align 1
43@1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([103 x i8], [103 x i8]* @0, i32 0, i32 0) }, align 8
44@2 = private unnamed_addr constant [72 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;test_fallback;11;1;;\00", align 1
45@3 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([72 x i8], [72 x i8]* @2, i32 0, i32 0) }, align 8
46@4 = private unnamed_addr constant [104 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_fallback_l11;11;25;;\00", align 1
47@5 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([104 x i8], [104 x i8]* @4, i32 0, i32 0) }, align 8
48@__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode = weak constant i8 1
49@6 = private unnamed_addr constant [106 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;1;;\00", align 1
50@7 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([106 x i8], [106 x i8]* @6, i32 0, i32 0) }, align 8
51@8 = private unnamed_addr constant [75 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;test_no_fallback;20;1;;\00", align 1
52@9 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([75 x i8], [75 x i8]* @8, i32 0, i32 0) }, align 8
53@10 = private unnamed_addr constant [107 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;__omp_offloading_2a_d80d3d_test_no_fallback_l20;20;25;;\00", align 1
54@11 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([107 x i8], [107 x i8]* @10, i32 0, i32 0) }, align 8
55@__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode = weak constant i8 1
56@12 = private unnamed_addr constant [63 x i8] c";llvm/test/Transforms/OpenMP/spmdization_remarks.c;known;4;1;;\00", align 1
57@13 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 2, i32 0, i8* getelementptr inbounds ([63 x i8], [63 x i8]* @12, i32 0, i32 0) }, align 8
58@G = external global i32
59@llvm.compiler.used = appending global [2 x i8*] [i8* @__omp_offloading_2a_d80d3d_test_fallback_l11_exec_mode, i8* @__omp_offloading_2a_d80d3d_test_no_fallback_l20_exec_mode], section "llvm.metadata"
60
61; Function Attrs: convergent norecurse nounwind
62define weak void @__omp_offloading_2a_d80d3d_test_fallback_l11() local_unnamed_addr #0 !dbg !15 {
63entry:
64  %captured_vars_addrs.i.i = alloca [0 x i8*], align 8
65  %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @1, i8 1, i1 true, i1 true) #3, !dbg !18
66  %exec_user_code = icmp eq i32 %0, -1, !dbg !18
67  br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !18
68
69common.ret:                                       ; preds = %entry, %user_code.entry
70  ret void, !dbg !19
71
72user_code.entry:                                  ; preds = %entry
73  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @3) #3
74  call void @unknown() #6, !dbg !20
75  %2 = bitcast [0 x i8*]* %captured_vars_addrs.i.i to i8*
76  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
77  %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
78  %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i.i, i64 0, i64 0, !dbg !23
79  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !23
80  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !26
81  call void @unknown() #6, !dbg !27
82  call void @__kmpc_target_deinit(%struct.ident_t* nonnull @5, i8 1, i1 true) #3, !dbg !28
83  br label %common.ret
84}
85
86declare i32 @__kmpc_target_init(%struct.ident_t*, i8, i1, i1) local_unnamed_addr
87
88; Function Attrs: convergent
89declare void @unknown() local_unnamed_addr #1
90
91; Function Attrs: nounwind
92define hidden void @known() local_unnamed_addr #2 !dbg !29 {
93entry:
94  %captured_vars_addrs = alloca [0 x i8*], align 8
95  %0 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @13)
96  %1 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs, i64 0, i64 0, !dbg !30
97  call void @__kmpc_parallel_51(%struct.ident_t* nonnull @13, i32 %0, i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** nonnull %1, i64 0) #3, !dbg !30
98  ret void, !dbg !31
99}
100
101; Function Attrs: nounwind
102declare i32 @__kmpc_global_thread_num(%struct.ident_t*) local_unnamed_addr #3
103
104declare void @__kmpc_target_deinit(%struct.ident_t*, i8, i1) local_unnamed_addr
105
106; Function Attrs: norecurse nounwind
107define weak void @__omp_offloading_2a_d80d3d_test_no_fallback_l20() local_unnamed_addr #4 !dbg !32 {
108entry:
109  %captured_vars_addrs.i2.i = alloca [0 x i8*], align 8
110  %0 = call i32 @__kmpc_target_init(%struct.ident_t* nonnull @7, i8 1, i1 true, i1 true) #3, !dbg !33
111  %exec_user_code = icmp eq i32 %0, -1, !dbg !33
112  br i1 %exec_user_code, label %user_code.entry, label %common.ret, !dbg !33
113
114common.ret:                                       ; preds = %entry, %user_code.entry
115  ret void, !dbg !34
116
117user_code.entry:                                  ; preds = %entry
118  %1 = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @9) #3
119  %2 = bitcast [0 x i8*]* %captured_vars_addrs.i2.i to i8*
120  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
121  %3 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
122  %4 = getelementptr inbounds [0 x i8*], [0 x i8*]* %captured_vars_addrs.i2.i, i64 0, i64 0, !dbg !35
123  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %3, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !35
124  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !39
125  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
126  %5 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
127  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %5, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !40
128  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !42
129  call void @llvm.lifetime.start.p0i8(i64 0, i8* nonnull %2) #3
130  %6 = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull @13) #3
131  call void @__kmpc_parallel_51(%struct.ident_t* noundef nonnull @13, i32 %6, i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef nonnull %4, i64 noundef 0) #3, !dbg !43
132  call void @llvm.lifetime.end.p0i8(i64 0, i8* nonnull %2) #3, !dbg !45
133  call void @spmd_amenable()
134  call void @__kmpc_target_deinit(%struct.ident_t* nonnull @11, i8 1, i1 true) #3, !dbg !46
135  br label %common.ret
136}
137
138; Function Attrs: convergent norecurse nounwind
139define internal void @__omp_outlined__2(i32* noalias nocapture nofree readnone %.global_tid., i32* noalias nocapture nofree readnone %.bound_tid.) #0 !dbg !47 {
140entry:
141  call void @unknown() #6, !dbg !48
142  ret void, !dbg !49
143}
144
145; Function Attrs: convergent norecurse nounwind
146define internal void @__omp_outlined__2_wrapper(i16 zeroext %0, i32 %1) #0 !dbg !50 {
147entry:
148  %global_args = alloca i8**, align 8
149  call void @__kmpc_get_shared_variables(i8*** nonnull %global_args) #3, !dbg !51
150  call void @unknown() #6, !dbg !52
151  ret void, !dbg !51
152}
153
154declare void @__kmpc_get_shared_variables(i8***) local_unnamed_addr
155
156declare void @__kmpc_parallel_51(%struct.ident_t*, i32, i32, i32, i32, i8*, i8*, i8**, i64) local_unnamed_addr
157
158; Function Attrs: argmemonly nofree nosync nounwind willreturn
159declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #5
160
161; Function Attrs: argmemonly nofree nosync nounwind willreturn
162declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #5
163
164declare void @spmd_amenable() #7
165
166attributes #0 = { convergent norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
167attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
168attributes #2 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
169attributes #3 = { nounwind }
170attributes #4 = { norecurse nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="sm_53" "target-features"="+ptx32,+sm_53" }
171attributes #5 = { argmemonly nofree nosync nounwind willreturn }
172attributes #6 = { convergent nounwind }
173attributes #7 = { "llvm.assume"="ompx_spmd_amenable" }
174
175!llvm.dbg.cu = !{!0}
176!omp_offload.info = !{!3, !4}
177!nvvm.annotations = !{!5, !6}
178!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
179!llvm.ident = !{!14}
180
181!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 13.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: DebugDirectivesOnly, enums: !2, splitDebugInlining: false, nameTableKind: None)
182!1 = !DIFile(filename: "spmdization_remarks.c", directory: "/data/src/llvm-project")
183!2 = !{}
184!3 = !{i32 0, i32 42, i32 14159165, !"test_no_fallback", i32 20, i32 1}
185!4 = !{i32 0, i32 42, i32 14159165, !"test_fallback", i32 11, i32 0}
186!5 = !{void ()* @__omp_offloading_2a_d80d3d_test_fallback_l11, !"kernel", i32 1}
187!6 = !{void ()* @__omp_offloading_2a_d80d3d_test_no_fallback_l20, !"kernel", i32 1}
188!7 = !{i32 7, !"Dwarf Version", i32 2}
189!8 = !{i32 2, !"Debug Info Version", i32 3}
190!9 = !{i32 1, !"wchar_size", i32 4}
191!10 = !{i32 7, !"openmp", i32 50}
192!11 = !{i32 7, !"openmp-device", i32 50}
193!12 = !{i32 7, !"PIC Level", i32 2}
194!13 = !{i32 7, !"frame-pointer", i32 2}
195!14 = !{!"clang version 13.0.0"}
196!15 = distinct !DISubprogram(name: "__omp_offloading_2a_d80d3d_test_fallback_l11", scope: !16, file: !16, line: 11, type: !17, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
197!16 = !DIFile(filename: "llvm/test/Transforms/OpenMP/spmdization_remarks.c", directory: "/data/src/llvm-project")
198!17 = !DISubroutineType(types: !2)
199!18 = !DILocation(line: 11, column: 1, scope: !15)
200!19 = !DILocation(line: 0, scope: !15)
201!20 = !DILocation(line: 13, column: 5, scope: !21, inlinedAt: !22)
202!21 = distinct !DISubprogram(name: "__omp_outlined__", scope: !16, file: !16, line: 11, type: !17, scopeLine: 11, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
203!22 = distinct !DILocation(line: 11, column: 1, scope: !15)
204!23 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !25)
205!24 = distinct !DISubprogram(name: "known", scope: !16, file: !16, line: 3, type: !17, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
206!25 = distinct !DILocation(line: 14, column: 5, scope: !21, inlinedAt: !22)
207!26 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !25)
208!27 = !DILocation(line: 15, column: 5, scope: !21, inlinedAt: !22)
209!28 = !DILocation(line: 11, column: 25, scope: !15)
210!29 = distinct !DISubprogram(name: "known", scope: !16, file: !16, line: 3, type: !17, scopeLine: 3, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
211!30 = !DILocation(line: 4, column: 1, scope: !29)
212!31 = !DILocation(line: 8, column: 1, scope: !29)
213!32 = distinct !DISubprogram(name: "__omp_offloading_2a_d80d3d_test_no_fallback_l20", scope: !16, file: !16, line: 20, type: !17, scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
214!33 = !DILocation(line: 20, column: 1, scope: !32)
215!34 = !DILocation(line: 0, scope: !32)
216!35 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !36)
217!36 = distinct !DILocation(line: 22, column: 5, scope: !37, inlinedAt: !38)
218!37 = distinct !DISubprogram(name: "__omp_outlined__1", scope: !16, file: !16, line: 20, type: !17, scopeLine: 20, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
219!38 = distinct !DILocation(line: 20, column: 1, scope: !32)
220!39 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !36)
221!40 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !41)
222!41 = distinct !DILocation(line: 23, column: 5, scope: !37, inlinedAt: !38)
223!42 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !41)
224!43 = !DILocation(line: 4, column: 1, scope: !24, inlinedAt: !44)
225!44 = distinct !DILocation(line: 24, column: 5, scope: !37, inlinedAt: !38)
226!45 = !DILocation(line: 8, column: 1, scope: !24, inlinedAt: !44)
227!46 = !DILocation(line: 20, column: 25, scope: !32)
228!47 = distinct !DISubprogram(name: "__omp_outlined__2", scope: !16, file: !16, line: 4, type: !17, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
229!48 = !DILocation(line: 6, column: 5, scope: !47)
230!49 = !DILocation(line: 7, column: 3, scope: !47)
231!50 = distinct !DISubprogram(linkageName: "__omp_outlined__2_wrapper", scope: !16, file: !16, line: 4, type: !17, scopeLine: 4, flags: DIFlagArtificial, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
232!51 = !DILocation(line: 4, column: 1, scope: !50)
233!52 = !DILocation(line: 6, column: 5, scope: !47, inlinedAt: !53)
234!53 = distinct !DILocation(line: 4, column: 1, scope: !50)
235