1 //===- IR/OpenMPIRBuilder.h - OpenMP encoding builder for LLVM IR - C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the OpenMPIRBuilder class and helpers used as a convenient
10 // way to create LLVM instructions for OpenMP directives.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
15 #define LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
16 
17 #include "llvm/Frontend/OpenMP/OMPConstants.h"
18 #include "llvm/IR/DebugLoc.h"
19 #include "llvm/IR/IRBuilder.h"
20 #include "llvm/Support/Allocator.h"
21 #include <forward_list>
22 
23 namespace llvm {
24 class CanonicalLoopInfo;
25 
26 /// An interface to create LLVM-IR for OpenMP directives.
27 ///
28 /// Each OpenMP directive has a corresponding public generator method.
29 class OpenMPIRBuilder {
30 public:
31   /// Create a new OpenMPIRBuilder operating on the given module \p M. This will
32   /// not have an effect on \p M (see initialize).
OpenMPIRBuilder(Module & M)33   OpenMPIRBuilder(Module &M) : M(M), Builder(M.getContext()) {}
34   ~OpenMPIRBuilder();
35 
36   /// Initialize the internal state, this will put structures types and
37   /// potentially other helpers into the underlying module. Must be called
38   /// before any other method and only once!
39   void initialize();
40 
41   /// Finalize the underlying module, e.g., by outlining regions.
42   /// \param Fn                    The function to be finalized. If not used,
43   ///                              all functions are finalized.
44   /// \param AllowExtractorSinking Flag to include sinking instructions,
45   ///                              emitted by CodeExtractor, in the
46   ///                              outlined region. Default is false.
47   void finalize(Function *Fn = nullptr, bool AllowExtractorSinking = false);
48 
49   /// Add attributes known for \p FnID to \p Fn.
50   void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
51 
52   /// Type used throughout for insertion points.
53   using InsertPointTy = IRBuilder<>::InsertPoint;
54 
55   /// Callback type for variable finalization (think destructors).
56   ///
57   /// \param CodeGenIP is the insertion point at which the finalization code
58   ///                  should be placed.
59   ///
60   /// A finalize callback knows about all objects that need finalization, e.g.
61   /// destruction, when the scope of the currently generated construct is left
62   /// at the time, and location, the callback is invoked.
63   using FinalizeCallbackTy = std::function<void(InsertPointTy CodeGenIP)>;
64 
65   struct FinalizationInfo {
66     /// The finalization callback provided by the last in-flight invocation of
67     /// createXXXX for the directive of kind DK.
68     FinalizeCallbackTy FiniCB;
69 
70     /// The directive kind of the innermost directive that has an associated
71     /// region which might require finalization when it is left.
72     omp::Directive DK;
73 
74     /// Flag to indicate if the directive is cancellable.
75     bool IsCancellable;
76   };
77 
78   /// Push a finalization callback on the finalization stack.
79   ///
80   /// NOTE: Temporary solution until Clang CG is gone.
pushFinalizationCB(const FinalizationInfo & FI)81   void pushFinalizationCB(const FinalizationInfo &FI) {
82     FinalizationStack.push_back(FI);
83   }
84 
85   /// Pop the last finalization callback from the finalization stack.
86   ///
87   /// NOTE: Temporary solution until Clang CG is gone.
popFinalizationCB()88   void popFinalizationCB() { FinalizationStack.pop_back(); }
89 
90   /// Callback type for body (=inner region) code generation
91   ///
92   /// The callback takes code locations as arguments, each describing a
93   /// location at which code might need to be generated or a location that is
94   /// the target of control transfer.
95   ///
96   /// \param AllocaIP is the insertion point at which new alloca instructions
97   ///                 should be placed.
98   /// \param CodeGenIP is the insertion point at which the body code should be
99   ///                  placed.
100   /// \param ContinuationBB is the basic block target to leave the body.
101   ///
102   /// Note that all blocks pointed to by the arguments have terminators.
103   using BodyGenCallbackTy =
104       function_ref<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
105                         BasicBlock &ContinuationBB)>;
106 
107   // This is created primarily for sections construct as llvm::function_ref
108   // (BodyGenCallbackTy) is not storable (as described in the comments of
109   // function_ref class - function_ref contains non-ownable reference
110   // to the callable.
111   using StorableBodyGenCallbackTy =
112       std::function<void(InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
113                          BasicBlock &ContinuationBB)>;
114 
115   /// Callback type for loop body code generation.
116   ///
117   /// \param CodeGenIP is the insertion point where the loop's body code must be
118   ///                  placed. This will be a dedicated BasicBlock with a
119   ///                  conditional branch from the loop condition check and
120   ///                  terminated with an unconditional branch to the loop
121   ///                  latch.
122   /// \param IndVar    is the induction variable usable at the insertion point.
123   using LoopBodyGenCallbackTy =
124       function_ref<void(InsertPointTy CodeGenIP, Value *IndVar)>;
125 
126   /// Callback type for variable privatization (think copy & default
127   /// constructor).
128   ///
129   /// \param AllocaIP is the insertion point at which new alloca instructions
130   ///                 should be placed.
131   /// \param CodeGenIP is the insertion point at which the privatization code
132   ///                  should be placed.
133   /// \param Original The value being copied/created, should not be used in the
134   ///                 generated IR.
135   /// \param Inner The equivalent of \p Original that should be used in the
136   ///              generated IR; this is equal to \p Original if the value is
137   ///              a pointer and can thus be passed directly, otherwise it is
138   ///              an equivalent but different value.
139   /// \param ReplVal The replacement value, thus a copy or new created version
140   ///                of \p Inner.
141   ///
142   /// \returns The new insertion point where code generation continues and
143   ///          \p ReplVal the replacement value.
144   using PrivatizeCallbackTy = function_ref<InsertPointTy(
145       InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original,
146       Value &Inner, Value *&ReplVal)>;
147 
148   /// Description of a LLVM-IR insertion point (IP) and a debug/source location
149   /// (filename, line, column, ...).
150   struct LocationDescription {
151     template <typename T, typename U>
LocationDescriptionLocationDescription152     LocationDescription(const IRBuilder<T, U> &IRB)
153         : IP(IRB.saveIP()), DL(IRB.getCurrentDebugLocation()) {}
LocationDescriptionLocationDescription154     LocationDescription(const InsertPointTy &IP) : IP(IP) {}
LocationDescriptionLocationDescription155     LocationDescription(const InsertPointTy &IP, const DebugLoc &DL)
156         : IP(IP), DL(DL) {}
157     InsertPointTy IP;
158     DebugLoc DL;
159   };
160 
161   /// Emitter methods for OpenMP directives.
162   ///
163   ///{
164 
165   /// Generator for '#omp barrier'
166   ///
167   /// \param Loc The location where the barrier directive was encountered.
168   /// \param DK The kind of directive that caused the barrier.
169   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
170   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
171   ///                        should be checked and acted upon.
172   ///
173   /// \returns The insertion point after the barrier.
174   InsertPointTy createBarrier(const LocationDescription &Loc, omp::Directive DK,
175                               bool ForceSimpleCall = false,
176                               bool CheckCancelFlag = true);
177 
178   /// Generator for '#omp cancel'
179   ///
180   /// \param Loc The location where the directive was encountered.
181   /// \param IfCondition The evaluated 'if' clause expression, if any.
182   /// \param CanceledDirective The kind of directive that is cancled.
183   ///
184   /// \returns The insertion point after the barrier.
185   InsertPointTy createCancel(const LocationDescription &Loc, Value *IfCondition,
186                              omp::Directive CanceledDirective);
187 
188   /// Generator for '#omp parallel'
189   ///
190   /// \param Loc The insert and source location description.
191   /// \param AllocaIP The insertion points to be used for alloca instructions.
192   /// \param BodyGenCB Callback that will generate the region code.
193   /// \param PrivCB Callback to copy a given variable (think copy constructor).
194   /// \param FiniCB Callback to finalize variable copies.
195   /// \param IfCondition The evaluated 'if' clause expression, if any.
196   /// \param NumThreads The evaluated 'num_threads' clause expression, if any.
197   /// \param ProcBind The value of the 'proc_bind' clause (see ProcBindKind).
198   /// \param IsCancellable Flag to indicate a cancellable parallel region.
199   ///
200   /// \returns The insertion position *after* the parallel.
201   IRBuilder<>::InsertPoint
202   createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP,
203                  BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
204                  FinalizeCallbackTy FiniCB, Value *IfCondition,
205                  Value *NumThreads, omp::ProcBindKind ProcBind,
206                  bool IsCancellable);
207 
208   /// Generator for the control flow structure of an OpenMP canonical loop.
209   ///
210   /// This generator operates on the logical iteration space of the loop, i.e.
211   /// the caller only has to provide a loop trip count of the loop as defined by
212   /// base language semantics. The trip count is interpreted as an unsigned
213   /// integer. The induction variable passed to \p BodyGenCB will be of the same
214   /// type and run from 0 to \p TripCount - 1. It is up to the callback to
215   /// convert the logical iteration variable to the loop counter variable in the
216   /// loop body.
217   ///
218   /// \param Loc       The insert and source location description. The insert
219   ///                  location can be between two instructions or the end of a
220   ///                  degenerate block (e.g. a BB under construction).
221   /// \param BodyGenCB Callback that will generate the loop body code.
222   /// \param TripCount Number of iterations the loop body is executed.
223   /// \param Name      Base name used to derive BB and instruction names.
224   ///
225   /// \returns An object representing the created control flow structure which
226   ///          can be used for loop-associated directives.
227   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
228                                          LoopBodyGenCallbackTy BodyGenCB,
229                                          Value *TripCount,
230                                          const Twine &Name = "loop");
231 
232   /// Generator for the control flow structure of an OpenMP canonical loop.
233   ///
234   /// Instead of a logical iteration space, this allows specifying user-defined
235   /// loop counter values using increment, upper- and lower bounds. To
236   /// disambiguate the terminology when counting downwards, instead of lower
237   /// bounds we use \p Start for the loop counter value in the first body
238   /// iteration.
239   ///
240   /// Consider the following limitations:
241   ///
242   ///  * A loop counter space over all integer values of its bit-width cannot be
243   ///    represented. E.g using uint8_t, its loop trip count of 256 cannot be
244   ///    stored into an 8 bit integer):
245   ///
246   ///      DO I = 0, 255, 1
247   ///
248   ///  * Unsigned wrapping is only supported when wrapping only "once"; E.g.
249   ///    effectively counting downwards:
250   ///
251   ///      for (uint8_t i = 100u; i > 0; i += 127u)
252   ///
253   ///
254   /// TODO: May need to add additional parameters to represent:
255   ///
256   ///  * Allow representing downcounting with unsigned integers.
257   ///
258   ///  * Sign of the step and the comparison operator might disagree:
259   ///
260   ///      for (int i = 0; i < 42; --i)
261   ///
262   //
263   /// \param Loc       The insert and source location description.
264   /// \param BodyGenCB Callback that will generate the loop body code.
265   /// \param Start     Value of the loop counter for the first iterations.
266   /// \param Stop      Loop counter values past this will stop the the
267   ///                  iterations.
268   /// \param Step      Loop counter increment after each iteration; negative
269   ///                  means counting down. \param IsSigned  Whether Start, Stop
270   ///                  and Stop are signed integers.
271   /// \param InclusiveStop Whether  \p Stop itself is a valid value for the loop
272   ///                      counter.
273   /// \param ComputeIP Insertion point for instructions computing the trip
274   ///                  count. Can be used to ensure the trip count is available
275   ///                  at the outermost loop of a loop nest. If not set,
276   ///                  defaults to the preheader of the generated loop.
277   /// \param Name      Base name used to derive BB and instruction names.
278   ///
279   /// \returns An object representing the created control flow structure which
280   ///          can be used for loop-associated directives.
281   CanonicalLoopInfo *createCanonicalLoop(const LocationDescription &Loc,
282                                          LoopBodyGenCallbackTy BodyGenCB,
283                                          Value *Start, Value *Stop, Value *Step,
284                                          bool IsSigned, bool InclusiveStop,
285                                          InsertPointTy ComputeIP = {},
286                                          const Twine &Name = "loop");
287 
288   /// Collapse a loop nest into a single loop.
289   ///
290   /// Merges loops of a loop nest into a single CanonicalLoopNest representation
291   /// that has the same number of innermost loop iterations as the origin loop
292   /// nest. The induction variables of the input loops are derived from the
293   /// collapsed loop's induction variable. This is intended to be used to
294   /// implement OpenMP's collapse clause. Before applying a directive,
295   /// collapseLoops normalizes a loop nest to contain only a single loop and the
296   /// directive's implementation does not need to handle multiple loops itself.
297   /// This does not remove the need to handle all loop nest handling by
298   /// directives, such as the ordered(<n>) clause or the simd schedule-clause
299   /// modifier of the worksharing-loop directive.
300   ///
301   /// Example:
302   /// \code
303   ///   for (int i = 0; i < 7; ++i) // Canonical loop "i"
304   ///     for (int j = 0; j < 9; ++j) // Canonical loop "j"
305   ///       body(i, j);
306   /// \endcode
307   ///
308   /// After collapsing with Loops={i,j}, the loop is changed to
309   /// \code
310   ///   for (int ij = 0; ij < 63; ++ij) {
311   ///     int i = ij / 9;
312   ///     int j = ij % 9;
313   ///     body(i, j);
314   ///   }
315   /// \endcode
316   ///
317   /// In the current implementation, the following limitations apply:
318   ///
319   ///  * All input loops have an induction variable of the same type.
320   ///
321   ///  * The collapsed loop will have the same trip count integer type as the
322   ///    input loops. Therefore it is possible that the collapsed loop cannot
323   ///    represent all iterations of the input loops. For instance, assuming a
324   ///    32 bit integer type, and two input loops both iterating 2^16 times, the
325   ///    theoretical trip count of the collapsed loop would be 2^32 iteration,
326   ///    which cannot be represented in an 32-bit integer. Behavior is undefined
327   ///    in this case.
328   ///
329   ///  * The trip counts of every input loop must be available at \p ComputeIP.
330   ///    Non-rectangular loops are not yet supported.
331   ///
332   ///  * At each nest level, code between a surrounding loop and its nested loop
333   ///    is hoisted into the loop body, and such code will be executed more
334   ///    often than before collapsing (or not at all if any inner loop iteration
335   ///    has a trip count of 0). This is permitted by the OpenMP specification.
336   ///
337   /// \param DL        Debug location for instructions added for collapsing,
338   ///                  such as instructions to compute derive the input loop's
339   ///                  induction variables.
340   /// \param Loops     Loops in the loop nest to collapse. Loops are specified
341   ///                  from outermost-to-innermost and every control flow of a
342   ///                  loop's body must pass through its directly nested loop.
343   /// \param ComputeIP Where additional instruction that compute the collapsed
344   ///                  trip count. If not set, defaults to before the generated
345   ///                  loop.
346   ///
347   /// \returns The CanonicalLoopInfo object representing the collapsed loop.
348   CanonicalLoopInfo *collapseLoops(DebugLoc DL,
349                                    ArrayRef<CanonicalLoopInfo *> Loops,
350                                    InsertPointTy ComputeIP);
351 
352   /// Modifies the canonical loop to be a statically-scheduled workshare loop.
353   ///
354   /// This takes a \p LoopInfo representing a canonical loop, such as the one
355   /// created by \p createCanonicalLoop and emits additional instructions to
356   /// turn it into a workshare loop. In particular, it calls to an OpenMP
357   /// runtime function in the preheader to obtain the loop bounds to be used in
358   /// the current thread, updates the relevant instructions in the canonical
359   /// loop and calls to an OpenMP runtime finalization function after the loop.
360   ///
361   /// \param Loc      The source location description, the insertion location
362   ///                 is not used.
363   /// \param CLI      A descriptor of the canonical loop to workshare.
364   /// \param AllocaIP An insertion point for Alloca instructions usable in the
365   ///                 preheader of the loop.
366   /// \param NeedsBarrier Indicates whether a barrier must be inserted after
367   ///                     the loop.
368   /// \param Chunk    The size of loop chunk considered as a unit when
369   ///                 scheduling. If \p nullptr, defaults to 1.
370   ///
371   /// \returns Updated CanonicalLoopInfo.
372   CanonicalLoopInfo *createStaticWorkshareLoop(const LocationDescription &Loc,
373                                                CanonicalLoopInfo *CLI,
374                                                InsertPointTy AllocaIP,
375                                                bool NeedsBarrier,
376                                                Value *Chunk = nullptr);
377 
378   /// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
379   ///
380   /// This takes a \p LoopInfo representing a canonical loop, such as the one
381   /// created by \p createCanonicalLoop and emits additional instructions to
382   /// turn it into a workshare loop. In particular, it calls to an OpenMP
383   /// runtime function in the preheader to obtain, and then in each iteration
384   /// to update the loop counter.
385   /// \param Loc      The source location description, the insertion location
386   ///                 is not used.
387   /// \param CLI      A descriptor of the canonical loop to workshare.
388   /// \param AllocaIP An insertion point for Alloca instructions usable in the
389   ///                 preheader of the loop.
390   /// \param SchedType Type of scheduling to be passed to the init function.
391   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
392   ///                     the loop.
393   /// \param Chunk    The size of loop chunk considered as a unit when
394   ///                 scheduling. If \p nullptr, defaults to 1.
395   ///
396   /// \returns Point where to insert code after the loop.
397   InsertPointTy createDynamicWorkshareLoop(const LocationDescription &Loc,
398                                            CanonicalLoopInfo *CLI,
399                                            InsertPointTy AllocaIP,
400                                            omp::OMPScheduleType SchedType,
401                                            bool NeedsBarrier,
402                                            Value *Chunk = nullptr);
403 
404   /// Modifies the canonical loop to be a workshare loop.
405   ///
406   /// This takes a \p LoopInfo representing a canonical loop, such as the one
407   /// created by \p createCanonicalLoop and emits additional instructions to
408   /// turn it into a workshare loop. In particular, it calls to an OpenMP
409   /// runtime function in the preheader to obtain the loop bounds to be used in
410   /// the current thread, updates the relevant instructions in the canonical
411   /// loop and calls to an OpenMP runtime finalization function after the loop.
412   ///
413   /// \param Loc      The source location description, the insertion location
414   ///                 is not used.
415   /// \param CLI      A descriptor of the canonical loop to workshare.
416   /// \param AllocaIP An insertion point for Alloca instructions usable in the
417   ///                 preheader of the loop.
418   /// \param NeedsBarrier Indicates whether a barrier must be insterted after
419   ///                     the loop.
420   ///
421   /// \returns Updated CanonicalLoopInfo.
422   CanonicalLoopInfo *createWorkshareLoop(const LocationDescription &Loc,
423                                          CanonicalLoopInfo *CLI,
424                                          InsertPointTy AllocaIP,
425                                          bool NeedsBarrier);
426 
427   /// Tile a loop nest.
428   ///
429   /// Tiles the loops of \p Loops by the tile sizes in \p TileSizes. Loops in
430   /// \p/ Loops must be perfectly nested, from outermost to innermost loop
431   /// (i.e. Loops.front() is the outermost loop). The trip count llvm::Value
432   /// of every loop and every tile sizes must be usable in the outermost
433   /// loop's preheader. This implies that the loop nest is rectangular.
434   ///
435   /// Example:
436   /// \code
437   ///   for (int i = 0; i < 15; ++i) // Canonical loop "i"
438   ///     for (int j = 0; j < 14; ++j) // Canonical loop "j"
439   ///         body(i, j);
440   /// \endcode
441   ///
442   /// After tiling with Loops={i,j} and TileSizes={5,7}, the loop is changed to
443   /// \code
444   ///   for (int i1 = 0; i1 < 3; ++i1)
445   ///     for (int j1 = 0; j1 < 2; ++j1)
446   ///       for (int i2 = 0; i2 < 5; ++i2)
447   ///         for (int j2 = 0; j2 < 7; ++j2)
448   ///           body(i1*3+i2, j1*3+j2);
449   /// \endcode
450   ///
451   /// The returned vector are the loops {i1,j1,i2,j2}. The loops i1 and j1 are
452   /// referred to the floor, and the loops i2 and j2 are the tiles. Tiling also
453   /// handles non-constant trip counts, non-constant tile sizes and trip counts
454   /// that are not multiples of the tile size. In the latter case the tile loop
455   /// of the last floor-loop iteration will have fewer iterations than specified
456   /// as its tile size.
457   ///
458   ///
459   /// @param DL        Debug location for instructions added by tiling, for
460   ///                  instance the floor- and tile trip count computation.
461   /// @param Loops     Loops to tile. The CanonicalLoopInfo objects are
462   ///                  invalidated by this method, i.e. should not used after
463   ///                  tiling.
464   /// @param TileSizes For each loop in \p Loops, the tile size for that
465   ///                  dimensions.
466   ///
467   /// \returns A list of generated loops. Contains twice as many loops as the
468   ///          input loop nest; the first half are the floor loops and the
469   ///          second half are the tile loops.
470   std::vector<CanonicalLoopInfo *>
471   tileLoops(DebugLoc DL, ArrayRef<CanonicalLoopInfo *> Loops,
472             ArrayRef<Value *> TileSizes);
473 
474   /// Generator for '#omp flush'
475   ///
476   /// \param Loc The location where the flush directive was encountered
477   void createFlush(const LocationDescription &Loc);
478 
479   /// Generator for '#omp taskwait'
480   ///
481   /// \param Loc The location where the taskwait directive was encountered.
482   void createTaskwait(const LocationDescription &Loc);
483 
484   /// Generator for '#omp taskyield'
485   ///
486   /// \param Loc The location where the taskyield directive was encountered.
487   void createTaskyield(const LocationDescription &Loc);
488 
489   ///}
490 
491   /// Return the insertion point used by the underlying IRBuilder.
getInsertionPoint()492   InsertPointTy getInsertionPoint() { return Builder.saveIP(); }
493 
494   /// Update the internal location to \p Loc.
updateToLocation(const LocationDescription & Loc)495   bool updateToLocation(const LocationDescription &Loc) {
496     Builder.restoreIP(Loc.IP);
497     Builder.SetCurrentDebugLocation(Loc.DL);
498     return Loc.IP.getBlock() != nullptr;
499   }
500 
501   /// Return the function declaration for the runtime function with \p FnID.
502   FunctionCallee getOrCreateRuntimeFunction(Module &M,
503                                             omp::RuntimeFunction FnID);
504 
505   Function *getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID);
506 
507   /// Return the (LLVM-IR) string describing the source location \p LocStr.
508   Constant *getOrCreateSrcLocStr(StringRef LocStr);
509 
510   /// Return the (LLVM-IR) string describing the default source location.
511   Constant *getOrCreateDefaultSrcLocStr();
512 
513   /// Return the (LLVM-IR) string describing the source location identified by
514   /// the arguments.
515   Constant *getOrCreateSrcLocStr(StringRef FunctionName, StringRef FileName,
516                                  unsigned Line, unsigned Column);
517 
518   /// Return the (LLVM-IR) string describing the source location \p Loc.
519   Constant *getOrCreateSrcLocStr(const LocationDescription &Loc);
520 
521   /// Return an ident_t* encoding the source location \p SrcLocStr and \p Flags.
522   /// TODO: Create a enum class for the Reserve2Flags
523   Value *getOrCreateIdent(Constant *SrcLocStr,
524                           omp::IdentFlag Flags = omp::IdentFlag(0),
525                           unsigned Reserve2Flags = 0);
526 
527   // Get the type corresponding to __kmpc_impl_lanemask_t from the deviceRTL
528   Type *getLanemaskType();
529 
530   /// Generate control flow and cleanup for cancellation.
531   ///
532   /// \param CancelFlag Flag indicating if the cancellation is performed.
533   /// \param CanceledDirective The kind of directive that is cancled.
534   /// \param ExitCB Extra code to be generated in the exit block.
535   void emitCancelationCheckImpl(Value *CancelFlag,
536                                 omp::Directive CanceledDirective,
537                                 FinalizeCallbackTy ExitCB = {});
538 
539   /// Generate a barrier runtime call.
540   ///
541   /// \param Loc The location at which the request originated and is fulfilled.
542   /// \param DK The directive which caused the barrier
543   /// \param ForceSimpleCall Flag to force a simple (=non-cancellation) barrier.
544   /// \param CheckCancelFlag Flag to indicate a cancel barrier return value
545   ///                        should be checked and acted upon.
546   ///
547   /// \returns The insertion point after the barrier.
548   InsertPointTy emitBarrierImpl(const LocationDescription &Loc,
549                                 omp::Directive DK, bool ForceSimpleCall,
550                                 bool CheckCancelFlag);
551 
552   /// Generate a flush runtime call.
553   ///
554   /// \param Loc The location at which the request originated and is fulfilled.
555   void emitFlush(const LocationDescription &Loc);
556 
557   /// The finalization stack made up of finalize callbacks currently in-flight,
558   /// wrapped into FinalizationInfo objects that reference also the finalization
559   /// target block and the kind of cancellable directive.
560   SmallVector<FinalizationInfo, 8> FinalizationStack;
561 
562   /// Return true if the last entry in the finalization stack is of kind \p DK
563   /// and cancellable.
isLastFinalizationInfoCancellable(omp::Directive DK)564   bool isLastFinalizationInfoCancellable(omp::Directive DK) {
565     return !FinalizationStack.empty() &&
566            FinalizationStack.back().IsCancellable &&
567            FinalizationStack.back().DK == DK;
568   }
569 
570   /// Generate a taskwait runtime call.
571   ///
572   /// \param Loc The location at which the request originated and is fulfilled.
573   void emitTaskwaitImpl(const LocationDescription &Loc);
574 
575   /// Generate a taskyield runtime call.
576   ///
577   /// \param Loc The location at which the request originated and is fulfilled.
578   void emitTaskyieldImpl(const LocationDescription &Loc);
579 
580   /// Return the current thread ID.
581   ///
582   /// \param Ident The ident (ident_t*) describing the query origin.
583   Value *getOrCreateThreadID(Value *Ident);
584 
585   /// The underlying LLVM-IR module
586   Module &M;
587 
588   /// The LLVM-IR Builder used to create IR.
589   IRBuilder<> Builder;
590 
591   /// Map to remember source location strings
592   StringMap<Constant *> SrcLocStrMap;
593 
594   /// Map to remember existing ident_t*.
595   DenseMap<std::pair<Constant *, uint64_t>, Value *> IdentMap;
596 
597   /// Helper that contains information about regions we need to outline
598   /// during finalization.
599   struct OutlineInfo {
600     using PostOutlineCBTy = std::function<void(Function &)>;
601     PostOutlineCBTy PostOutlineCB;
602     BasicBlock *EntryBB, *ExitBB;
603 
604     /// Collect all blocks in between EntryBB and ExitBB in both the given
605     /// vector and set.
606     void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
607                        SmallVectorImpl<BasicBlock *> &BlockVector);
608 
609     /// Return the function that contains the region to be outlined.
getFunctionOutlineInfo610     Function *getFunction() const { return EntryBB->getParent(); }
611   };
612 
613   /// Collection of regions that need to be outlined during finalization.
614   SmallVector<OutlineInfo, 16> OutlineInfos;
615 
616   /// Collection of owned canonical loop objects that eventually need to be
617   /// free'd.
618   std::forward_list<CanonicalLoopInfo> LoopInfos;
619 
620   /// Add a new region that will be outlined later.
addOutlineInfo(OutlineInfo && OI)621   void addOutlineInfo(OutlineInfo &&OI) { OutlineInfos.emplace_back(OI); }
622 
623   /// An ordered map of auto-generated variables to their unique names.
624   /// It stores variables with the following names: 1) ".gomp_critical_user_" +
625   /// <critical_section_name> + ".var" for "omp critical" directives; 2)
626   /// <mangled_name_for_global_var> + ".cache." for cache for threadprivate
627   /// variables.
628   StringMap<AssertingVH<Constant>, BumpPtrAllocator> InternalVars;
629 
630   /// Create the global variable holding the offload mappings information.
631   GlobalVariable *createOffloadMaptypes(SmallVectorImpl<uint64_t> &Mappings,
632                                         std::string VarName);
633 
634   /// Create the global variable holding the offload names information.
635   GlobalVariable *
636   createOffloadMapnames(SmallVectorImpl<llvm::Constant *> &Names,
637                         std::string VarName);
638 
639   struct MapperAllocas {
640     AllocaInst *ArgsBase = nullptr;
641     AllocaInst *Args = nullptr;
642     AllocaInst *ArgSizes = nullptr;
643   };
644 
645   /// Create the allocas instruction used in call to mapper functions.
646   void createMapperAllocas(const LocationDescription &Loc,
647                            InsertPointTy AllocaIP, unsigned NumOperands,
648                            struct MapperAllocas &MapperAllocas);
649 
650   /// Create the call for the target mapper function.
651   /// \param Loc The source location description.
652   /// \param MapperFunc Function to be called.
653   /// \param SrcLocInfo Source location information global.
654   /// \param MaptypesArgs
655   /// \param MapnamesArg
656   /// \param MapperAllocas The AllocaInst used for the call.
657   /// \param DeviceID Device ID for the call.
658   /// \param TotalNbOperand Number of operand in the call.
659   void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc,
660                       Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg,
661                       struct MapperAllocas &MapperAllocas, int64_t DeviceID,
662                       unsigned NumOperands);
663 
664 public:
665   /// Generator for __kmpc_copyprivate
666   ///
667   /// \param Loc The source location description.
668   /// \param BufSize Number of elements in the buffer.
669   /// \param CpyBuf List of pointers to data to be copied.
670   /// \param CpyFn function to call for copying data.
671   /// \param DidIt flag variable; 1 for 'single' thread, 0 otherwise.
672   ///
673   /// \return The insertion position *after* the CopyPrivate call.
674 
675   InsertPointTy createCopyPrivate(const LocationDescription &Loc,
676                                   llvm::Value *BufSize, llvm::Value *CpyBuf,
677                                   llvm::Value *CpyFn, llvm::Value *DidIt);
678 
679   /// Generator for '#omp single'
680   ///
681   /// \param Loc The source location description.
682   /// \param BodyGenCB Callback that will generate the region code.
683   /// \param FiniCB Callback to finalize variable copies.
684   /// \param DidIt Local variable used as a flag to indicate 'single' thread
685   ///
686   /// \returns The insertion position *after* the single call.
687   InsertPointTy createSingle(const LocationDescription &Loc,
688                              BodyGenCallbackTy BodyGenCB,
689                              FinalizeCallbackTy FiniCB, llvm::Value *DidIt);
690 
691   /// Generator for '#omp master'
692   ///
693   /// \param Loc The insert and source location description.
694   /// \param BodyGenCB Callback that will generate the region code.
695   /// \param FiniCB Callback to finalize variable copies.
696   ///
697   /// \returns The insertion position *after* the master.
698   InsertPointTy createMaster(const LocationDescription &Loc,
699                              BodyGenCallbackTy BodyGenCB,
700                              FinalizeCallbackTy FiniCB);
701 
702   /// Generator for '#omp masked'
703   ///
704   /// \param Loc The insert and source location description.
705   /// \param BodyGenCB Callback that will generate the region code.
706   /// \param FiniCB Callback to finialize variable copies.
707   ///
708   /// \returns The insertion position *after* the master.
709   InsertPointTy createMasked(const LocationDescription &Loc,
710                              BodyGenCallbackTy BodyGenCB,
711                              FinalizeCallbackTy FiniCB, Value *Filter);
712 
713   /// Generator for '#omp critical'
714   ///
715   /// \param Loc The insert and source location description.
716   /// \param BodyGenCB Callback that will generate the region body code.
717   /// \param FiniCB Callback to finalize variable copies.
718   /// \param CriticalName name of the lock used by the critical directive
719   /// \param HintInst Hint Instruction for hint clause associated with critical
720   ///
721   /// \returns The insertion position *after* the master.
722   InsertPointTy createCritical(const LocationDescription &Loc,
723                                BodyGenCallbackTy BodyGenCB,
724                                FinalizeCallbackTy FiniCB,
725                                StringRef CriticalName, Value *HintInst);
726 
727   /// Generator for '#omp sections'
728   ///
729   /// \param Loc The insert and source location description.
730   /// \param AllocaIP The insertion points to be used for alloca instructions.
731   /// \param SectionCBs Callbacks that will generate body of each section.
732   /// \param PrivCB Callback to copy a given variable (think copy constructor).
733   /// \param FiniCB Callback to finalize variable copies.
734   /// \param IsCancellable Flag to indicate a cancellable parallel region.
735   /// \param IsNowait If true, barrier - to ensure all sections are executed
736   /// before moving forward will not be generated.
737   /// \returns The insertion position *after* the sections.
738   InsertPointTy createSections(const LocationDescription &Loc,
739                                InsertPointTy AllocaIP,
740                                ArrayRef<StorableBodyGenCallbackTy> SectionCBs,
741                                PrivatizeCallbackTy PrivCB,
742                                FinalizeCallbackTy FiniCB, bool IsCancellable,
743                                bool IsNowait);
744 
745   /// Generator for '#omp section'
746   ///
747   /// \param Loc The insert and source location description.
748   /// \param BodyGenCB Callback that will generate the region body code.
749   /// \param FiniCB Callback to finalize variable copies.
750   /// \returns The insertion position *after* the section.
751   InsertPointTy createSection(const LocationDescription &Loc,
752                               BodyGenCallbackTy BodyGenCB,
753                               FinalizeCallbackTy FiniCB);
754 
755   /// Generate conditional branch and relevant BasicBlocks through which private
756   /// threads copy the 'copyin' variables from Master copy to threadprivate
757   /// copies.
758   ///
759   /// \param IP insertion block for copyin conditional
760   /// \param MasterVarPtr a pointer to the master variable
761   /// \param PrivateVarPtr a pointer to the threadprivate variable
762   /// \param IntPtrTy Pointer size type
763   /// \param BranchtoEnd Create a branch between the copyin.not.master blocks
764   //				 and copy.in.end block
765   ///
766   /// \returns The insertion point where copying operation to be emitted.
767   InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr,
768                                          Value *PrivateAddr,
769                                          llvm::IntegerType *IntPtrTy,
770                                          bool BranchtoEnd = true);
771 
772   /// Create a runtime call for kmpc_Alloc
773   ///
774   /// \param Loc The insert and source location description.
775   /// \param Size Size of allocated memory space
776   /// \param Allocator Allocator information instruction
777   /// \param Name Name of call Instruction for OMP_alloc
778   ///
779   /// \returns CallInst to the OMP_Alloc call
780   CallInst *createOMPAlloc(const LocationDescription &Loc, Value *Size,
781                            Value *Allocator, std::string Name = "");
782 
783   /// Create a runtime call for kmpc_free
784   ///
785   /// \param Loc The insert and source location description.
786   /// \param Addr Address of memory space to be freed
787   /// \param Allocator Allocator information instruction
788   /// \param Name Name of call Instruction for OMP_Free
789   ///
790   /// \returns CallInst to the OMP_Free call
791   CallInst *createOMPFree(const LocationDescription &Loc, Value *Addr,
792                           Value *Allocator, std::string Name = "");
793 
794   /// Create a runtime call for kmpc_threadprivate_cached
795   ///
796   /// \param Loc The insert and source location description.
797   /// \param Pointer pointer to data to be cached
798   /// \param Size size of data to be cached
799   /// \param Name Name of call Instruction for callinst
800   ///
801   /// \returns CallInst to the thread private cache call.
802   CallInst *createCachedThreadPrivate(const LocationDescription &Loc,
803                                       llvm::Value *Pointer,
804                                       llvm::ConstantInt *Size,
805                                       const llvm::Twine &Name = Twine(""));
806 
807   /// The `omp target` interface
808   ///
809   /// For more information about the usage of this interface,
810   /// \see openmp/libomptarget/deviceRTLs/common/include/target.h
811   ///
812   ///{
813 
814   /// Create a runtime call for kmpc_target_init
815   ///
816   /// \param Loc The insert and source location description.
817   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
818   /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
819   InsertPointTy createTargetInit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime);
820 
821   /// Create a runtime call for kmpc_target_deinit
822   ///
823   /// \param Loc The insert and source location description.
824   /// \param IsSPMD Flag to indicate if the kernel is an SPMD kernel or not.
825   /// \param RequiresFullRuntime Indicate if a full device runtime is necessary.
826   void createTargetDeinit(const LocationDescription &Loc, bool IsSPMD, bool RequiresFullRuntime);
827 
828   ///}
829 
830   /// Declarations for LLVM-IR types (simple, array, function and structure) are
831   /// generated below. Their names are defined and used in OpenMPKinds.def. Here
832   /// we provide the declarations, the initializeTypes function will provide the
833   /// values.
834   ///
835   ///{
836 #define OMP_TYPE(VarName, InitValue) Type *VarName = nullptr;
837 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize)                             \
838   ArrayType *VarName##Ty = nullptr;                                            \
839   PointerType *VarName##PtrTy = nullptr;
840 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...)                  \
841   FunctionType *VarName = nullptr;                                             \
842   PointerType *VarName##Ptr = nullptr;
843 #define OMP_STRUCT_TYPE(VarName, StrName, ...)                                 \
844   StructType *VarName = nullptr;                                               \
845   PointerType *VarName##Ptr = nullptr;
846 #include "llvm/Frontend/OpenMP/OMPKinds.def"
847 
848   ///}
849 
850 private:
851   /// Create all simple and struct types exposed by the runtime and remember
852   /// the llvm::PointerTypes of them for easy access later.
853   void initializeTypes(Module &M);
854 
855   /// Common interface for generating entry calls for OMP Directives.
856   /// if the directive has a region/body, It will set the insertion
857   /// point to the body
858   ///
859   /// \param OMPD Directive to generate entry blocks for
860   /// \param EntryCall Call to the entry OMP Runtime Function
861   /// \param ExitBB block where the region ends.
862   /// \param Conditional indicate if the entry call result will be used
863   ///        to evaluate a conditional of whether a thread will execute
864   ///        body code or not.
865   ///
866   /// \return The insertion position in exit block
867   InsertPointTy emitCommonDirectiveEntry(omp::Directive OMPD, Value *EntryCall,
868                                          BasicBlock *ExitBB,
869                                          bool Conditional = false);
870 
871   /// Common interface to finalize the region
872   ///
873   /// \param OMPD Directive to generate exiting code for
874   /// \param FinIP Insertion point for emitting Finalization code and exit call
875   /// \param ExitCall Call to the ending OMP Runtime Function
876   /// \param HasFinalize indicate if the directive will require finalization
877   ///         and has a finalization callback in the stack that
878   ///        should be called.
879   ///
880   /// \return The insertion position in exit block
881   InsertPointTy emitCommonDirectiveExit(omp::Directive OMPD,
882                                         InsertPointTy FinIP,
883                                         Instruction *ExitCall,
884                                         bool HasFinalize = true);
885 
886   /// Common Interface to generate OMP inlined regions
887   ///
888   /// \param OMPD Directive to generate inlined region for
889   /// \param EntryCall Call to the entry OMP Runtime Function
890   /// \param ExitCall Call to the ending OMP Runtime Function
891   /// \param BodyGenCB Body code generation callback.
892   /// \param FiniCB Finalization Callback. Will be called when finalizing region
893   /// \param Conditional indicate if the entry call result will be used
894   ///        to evaluate a conditional of whether a thread will execute
895   ///        body code or not.
896   /// \param HasFinalize indicate if the directive will require finalization
897   ///        and has a finalization callback in the stack that
898   ///        should be called.
899   /// \param IsCancellable if HasFinalize is set to true, indicate if the
900   ///        the directive should be cancellable.
901   /// \return The insertion point after the region
902 
903   InsertPointTy
904   EmitOMPInlinedRegion(omp::Directive OMPD, Instruction *EntryCall,
905                        Instruction *ExitCall, BodyGenCallbackTy BodyGenCB,
906                        FinalizeCallbackTy FiniCB, bool Conditional = false,
907                        bool HasFinalize = true, bool IsCancellable = false);
908 
909   /// Get the platform-specific name separator.
910   /// \param Parts different parts of the final name that needs separation
911   /// \param FirstSeparator First separator used between the initial two
912   ///        parts of the name.
913   /// \param Separator separator used between all of the rest consecutive
914   ///        parts of the name
915   static std::string getNameWithSeparators(ArrayRef<StringRef> Parts,
916                                            StringRef FirstSeparator,
917                                            StringRef Separator);
918 
919   /// Gets (if variable with the given name already exist) or creates
920   /// internal global variable with the specified Name. The created variable has
921   /// linkage CommonLinkage by default and is initialized by null value.
922   /// \param Ty Type of the global variable. If it is exist already the type
923   /// must be the same.
924   /// \param Name Name of the variable.
925   Constant *getOrCreateOMPInternalVariable(Type *Ty, const Twine &Name,
926                                            unsigned AddressSpace = 0);
927 
928   /// Returns corresponding lock object for the specified critical region
929   /// name. If the lock object does not exist it is created, otherwise the
930   /// reference to the existing copy is returned.
931   /// \param CriticalName Name of the critical region.
932   ///
933   Value *getOMPCriticalRegionLock(StringRef CriticalName);
934 
935   /// Callback type for Atomic Expression update
936   /// ex:
937   /// \code{.cpp}
938   /// unsigned x = 0;
939   /// #pragma omp atomic update
940   /// x = Expr(x_old);  //Expr() is any legal operation
941   /// \endcode
942   ///
943   /// \param XOld the value of the atomic memory address to use for update
944   /// \param IRB reference to the IRBuilder to use
945   ///
946   /// \returns Value to update X to.
947   using AtomicUpdateCallbackTy =
948       const function_ref<Value *(Value *XOld, IRBuilder<> &IRB)>;
949 
950 private:
951   enum AtomicKind { Read, Write, Update, Capture };
952 
953   /// Determine whether to emit flush or not
954   ///
955   /// \param Loc    The insert and source location description.
956   /// \param AO     The required atomic ordering
957   /// \param AK     The OpenMP atomic operation kind used.
958   ///
959   /// \returns		wether a flush was emitted or not
960   bool checkAndEmitFlushAfterAtomic(const LocationDescription &Loc,
961                                     AtomicOrdering AO, AtomicKind AK);
962 
963   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
964   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
965   /// Only Scalar data types.
966   ///
967   /// \param AllocIP	  Instruction to create AllocaInst before.
968   /// \param X			    The target atomic pointer to be updated
969   /// \param Expr		    The value to update X with.
970   /// \param AO			    Atomic ordering of the generated atomic
971   ///                   instructions.
972   /// \param RMWOp		  The binary operation used for update. If
973   ///                   operation is not supported by atomicRMW,
974   ///                   or belong to {FADD, FSUB, BAD_BINOP}.
975   ///                   Then a `cmpExch` based	atomic will be generated.
976   /// \param UpdateOp 	Code generator for complex expressions that cannot be
977   ///                   expressed through atomicrmw instruction.
978   /// \param VolatileX	     true if \a X volatile?
979   /// \param IsXLHSInRHSPart true if \a X is Left H.S. in Right H.S. part of
980   ///                        the update expression, false otherwise.
981   ///                        (e.g. true for X = X BinOp Expr)
982   ///
983   /// \returns A pair of the old value of X before the update, and the value
984   ///          used for the update.
985   std::pair<Value *, Value *> emitAtomicUpdate(Instruction *AllocIP, Value *X,
986                                                Value *Expr, AtomicOrdering AO,
987                                                AtomicRMWInst::BinOp RMWOp,
988                                                AtomicUpdateCallbackTy &UpdateOp,
989                                                bool VolatileX,
990                                                bool IsXLHSInRHSPart);
991 
992   /// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
993   ///
994   /// \Return The instruction
995   Value *emitRMWOpAsInstruction(Value *Src1, Value *Src2,
996                                 AtomicRMWInst::BinOp RMWOp);
997 
998 public:
999   /// a struct to pack relevant information while generating atomic Ops
1000   struct AtomicOpValue {
1001     Value *Var = nullptr;
1002     bool IsSigned = false;
1003     bool IsVolatile = false;
1004   };
1005 
1006   /// Emit atomic Read for : V = X --- Only Scalar data types.
1007   ///
1008   /// \param Loc    The insert and source location description.
1009   /// \param X			The target pointer to be atomically read
1010   /// \param V			Memory address where to store atomically read
1011   /// 					    value
1012   /// \param AO			Atomic ordering of the generated atomic
1013   /// 					    instructions.
1014   ///
1015   /// \return Insertion point after generated atomic read IR.
1016   InsertPointTy createAtomicRead(const LocationDescription &Loc,
1017                                  AtomicOpValue &X, AtomicOpValue &V,
1018                                  AtomicOrdering AO);
1019 
1020   /// Emit atomic write for : X = Expr --- Only Scalar data types.
1021   ///
1022   /// \param Loc    The insert and source location description.
1023   /// \param X			The target pointer to be atomically written to
1024   /// \param Expr		The value to store.
1025   /// \param AO			Atomic ordering of the generated atomic
1026   ///               instructions.
1027   ///
1028   /// \return Insertion point after generated atomic Write IR.
1029   InsertPointTy createAtomicWrite(const LocationDescription &Loc,
1030                                   AtomicOpValue &X, Value *Expr,
1031                                   AtomicOrdering AO);
1032 
1033   /// Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X
1034   /// For complex Operations: X = UpdateOp(X) => CmpExch X, old_X, UpdateOp(X)
1035   /// Only Scalar data types.
1036   ///
1037   /// \param Loc      The insert and source location description.
1038   /// \param AllocIP  Instruction to create AllocaInst before.
1039   /// \param X        The target atomic pointer to be updated
1040   /// \param Expr     The value to update X with.
1041   /// \param AO       Atomic ordering of the generated atomic instructions.
1042   /// \param RMWOp    The binary operation used for update. If operation
1043   ///                 is	not supported by atomicRMW, or belong to
1044   ///	                {FADD, FSUB, BAD_BINOP}. Then a `cmpExch` based
1045   ///                 atomic will be generated.
1046   /// \param UpdateOp 	Code generator for complex expressions that cannot be
1047   ///                   expressed through atomicrmw instruction.
1048   /// \param IsXLHSInRHSPart true if \a X is Left H.S. in Right H.S. part of
1049   ///                        the update expression, false otherwise.
1050   ///	                       (e.g. true for X = X BinOp Expr)
1051   ///
1052   /// \return Insertion point after generated atomic update IR.
1053   InsertPointTy createAtomicUpdate(const LocationDescription &Loc,
1054                                    Instruction *AllocIP, AtomicOpValue &X,
1055                                    Value *Expr, AtomicOrdering AO,
1056                                    AtomicRMWInst::BinOp RMWOp,
1057                                    AtomicUpdateCallbackTy &UpdateOp,
1058                                    bool IsXLHSInRHSPart);
1059 
1060   /// Emit atomic update for constructs: --- Only Scalar data types
1061   /// V = X; X = X BinOp Expr ,
1062   /// X = X BinOp Expr; V = X,
1063   /// V = X; X = Expr BinOp X,
1064   /// X = Expr BinOp X; V = X,
1065   /// V = X; X = UpdateOp(X),
1066   /// X = UpdateOp(X); V = X,
1067   ///
1068   /// \param Loc        The insert and source location description.
1069   /// \param AllocIP    Instruction to create AllocaInst before.
1070   /// \param X          The target atomic pointer to be updated
1071   /// \param V          Memory address where to store captured value
1072   /// \param Expr       The value to update X with.
1073   /// \param AO         Atomic ordering of the generated atomic instructions
1074   /// \param RMWOp      The binary operation used for update. If
1075   ///                   operation is not supported by atomicRMW, or belong to
1076   ///	                  {FADD, FSUB, BAD_BINOP}. Then a cmpExch based
1077   ///                   atomic will be generated.
1078   /// \param UpdateOp   Code generator for complex expressions that cannot be
1079   ///                   expressed through atomicrmw instruction.
1080   /// \param UpdateExpr true if X is an in place update of the form
1081   ///                   X = X BinOp Expr or X = Expr BinOp X
1082   /// \param IsXLHSInRHSPart true if X is Left H.S. in Right H.S. part of the
1083   ///                        update expression, false otherwise.
1084   ///                        (e.g. true for X = X BinOp Expr)
1085   /// \param IsPostfixUpdate true if original value of 'x' must be stored in
1086   ///                        'v', not an updated one.
1087   ///
1088   /// \return Insertion point after generated atomic capture IR.
1089   InsertPointTy
1090   createAtomicCapture(const LocationDescription &Loc, Instruction *AllocIP,
1091                       AtomicOpValue &X, AtomicOpValue &V, Value *Expr,
1092                       AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
1093                       AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr,
1094                       bool IsPostfixUpdate, bool IsXLHSInRHSPart);
1095 
1096   /// Create the control flow structure of a canonical OpenMP loop.
1097   ///
1098   /// The emitted loop will be disconnected, i.e. no edge to the loop's
1099   /// preheader and no terminator in the AfterBB. The OpenMPIRBuilder's
1100   /// IRBuilder location is not preserved.
1101   ///
1102   /// \param DL        DebugLoc used for the instructions in the skeleton.
1103   /// \param TripCount Value to be used for the trip count.
1104   /// \param F         Function in which to insert the BasicBlocks.
1105   /// \param PreInsertBefore  Where to insert BBs that execute before the body,
1106   ///                         typically the body itself.
1107   /// \param PostInsertBefore Where to insert BBs that execute after the body.
1108   /// \param Name      Base name used to derive BB
1109   ///                  and instruction names.
1110   ///
1111   /// \returns The CanonicalLoopInfo that represents the emitted loop.
1112   CanonicalLoopInfo *createLoopSkeleton(DebugLoc DL, Value *TripCount,
1113                                         Function *F,
1114                                         BasicBlock *PreInsertBefore,
1115                                         BasicBlock *PostInsertBefore,
1116                                         const Twine &Name = {});
1117 };
1118 
1119 /// Class to represented the control flow structure of an OpenMP canonical loop.
1120 ///
1121 /// The control-flow structure is standardized for easy consumption by
1122 /// directives associated with loops. For instance, the worksharing-loop
1123 /// construct may change this control flow such that each loop iteration is
1124 /// executed on only one thread.
1125 ///
1126 /// The control flow can be described as follows:
1127 ///
1128 ///     Preheader
1129 ///        |
1130 ///  /-> Header
1131 ///  |     |
1132 ///  |    Cond---\
1133 ///  |     |     |
1134 ///  |    Body   |
1135 ///  |    | |    |
1136 ///  |   <...>   |
1137 ///  |    | |    |
1138 ///   \--Latch   |
1139 ///              |
1140 ///             Exit
1141 ///              |
1142 ///            After
1143 ///
1144 /// Code in the header, condition block, latch and exit block must not have any
1145 /// side-effect. The body block is the single entry point into the loop body,
1146 /// which may contain arbitrary control flow as long as all control paths
1147 /// eventually branch to the latch block.
1148 ///
1149 /// Defined outside OpenMPIRBuilder because one cannot forward-declare nested
1150 /// classes.
1151 class CanonicalLoopInfo {
1152   friend class OpenMPIRBuilder;
1153 
1154 private:
1155   /// Whether this object currently represents a loop.
1156   bool IsValid = false;
1157 
1158   BasicBlock *Preheader;
1159   BasicBlock *Header;
1160   BasicBlock *Cond;
1161   BasicBlock *Body;
1162   BasicBlock *Latch;
1163   BasicBlock *Exit;
1164   BasicBlock *After;
1165 
1166   /// Add the control blocks of this loop to \p BBs.
1167   ///
1168   /// This does not include any block from the body, including the one returned
1169   /// by getBody().
1170   void collectControlBlocks(SmallVectorImpl<BasicBlock *> &BBs);
1171 
1172 public:
1173   /// The preheader ensures that there is only a single edge entering the loop.
1174   /// Code that must be execute before any loop iteration can be emitted here,
1175   /// such as computing the loop trip count and begin lifetime markers. Code in
1176   /// the preheader is not considered part of the canonical loop.
getPreheader()1177   BasicBlock *getPreheader() const { return Preheader; }
1178 
1179   /// The header is the entry for each iteration. In the canonical control flow,
1180   /// it only contains the PHINode for the induction variable.
getHeader()1181   BasicBlock *getHeader() const { return Header; }
1182 
1183   /// The condition block computes whether there is another loop iteration. If
1184   /// yes, branches to the body; otherwise to the exit block.
getCond()1185   BasicBlock *getCond() const { return Cond; }
1186 
1187   /// The body block is the single entry for a loop iteration and not controlled
1188   /// by CanonicalLoopInfo. It can contain arbitrary control flow but must
1189   /// eventually branch to the \p Latch block.
getBody()1190   BasicBlock *getBody() const { return Body; }
1191 
1192   /// Reaching the latch indicates the end of the loop body code. In the
1193   /// canonical control flow, it only contains the increment of the induction
1194   /// variable.
getLatch()1195   BasicBlock *getLatch() const { return Latch; }
1196 
1197   /// Reaching the exit indicates no more iterations are being executed.
getExit()1198   BasicBlock *getExit() const { return Exit; }
1199 
1200   /// The after block is intended for clean-up code such as lifetime end
1201   /// markers. It is separate from the exit block to ensure, analogous to the
1202   /// preheader, it having just a single entry edge and being free from PHI
1203   /// nodes should there be multiple loop exits (such as from break
1204   /// statements/cancellations).
getAfter()1205   BasicBlock *getAfter() const { return After; }
1206 
1207   /// Returns the llvm::Value containing the number of loop iterations. It must
1208   /// be valid in the preheader and always interpreted as an unsigned integer of
1209   /// any bit-width.
getTripCount()1210   Value *getTripCount() const {
1211     Instruction *CmpI = &Cond->front();
1212     assert(isa<CmpInst>(CmpI) && "First inst must compare IV with TripCount");
1213     return CmpI->getOperand(1);
1214   }
1215 
1216   /// Returns the instruction representing the current logical induction
1217   /// variable. Always unsigned, always starting at 0 with an increment of one.
getIndVar()1218   Instruction *getIndVar() const {
1219     Instruction *IndVarPHI = &Header->front();
1220     assert(isa<PHINode>(IndVarPHI) && "First inst must be the IV PHI");
1221     return IndVarPHI;
1222   }
1223 
1224   /// Return the type of the induction variable (and the trip count).
getIndVarType()1225   Type *getIndVarType() const { return getIndVar()->getType(); }
1226 
1227   /// Return the insertion point for user code before the loop.
getPreheaderIP()1228   OpenMPIRBuilder::InsertPointTy getPreheaderIP() const {
1229     return {Preheader, std::prev(Preheader->end())};
1230   };
1231 
1232   /// Return the insertion point for user code in the body.
getBodyIP()1233   OpenMPIRBuilder::InsertPointTy getBodyIP() const {
1234     return {Body, Body->begin()};
1235   };
1236 
1237   /// Return the insertion point for user code after the loop.
getAfterIP()1238   OpenMPIRBuilder::InsertPointTy getAfterIP() const {
1239     return {After, After->begin()};
1240   };
1241 
getFunction()1242   Function *getFunction() const { return Header->getParent(); }
1243 
1244   /// Consistency self-check.
1245   void assertOK() const;
1246 };
1247 
1248 } // end namespace llvm
1249 
1250 #endif // LLVM_FRONTEND_OPENMP_OMPIRBUILDER_H
1251