1 //===-- runtime/reduction-templates.h -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // Generic function templates used by various reduction transformation
10 // intrinsic functions (SUM, PRODUCT, &c.)
11 //
12 // * Partial reductions (i.e., those with DIM= arguments that are not
13 //   required to be 1 by the rank of the argument) return arrays that
14 //   are dynamically allocated in a caller-supplied descriptor.
15 // * Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC
16 //   return integer vectors of some kind, not scalars; a caller-supplied
17 //   descriptor is used
18 // * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
19 //   length results, dynamically allocated in a caller-supplied descriptor
20 
21 #ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
22 #define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
23 
24 #include "terminator.h"
25 #include "tools.h"
26 #include "flang/Runtime/cpp-type.h"
27 #include "flang/Runtime/descriptor.h"
28 
29 namespace Fortran::runtime {
30 
31 // Reductions are implemented with *accumulators*, which are instances of
32 // classes that incrementally build up the result (or an element thereof) during
33 // a traversal of the unmasked elements of an array.  Each accumulator class
34 // supports a constructor (which captures a reference to the array), an
35 // AccumulateAt() member function that applies supplied subscripts to the
36 // array and does something with a scalar element, and a GetResult()
37 // member function that copies a final result into its destination.
38 
39 // Total reduction of the array argument to a scalar (or to a vector in the
40 // cases of FINDLOC, MAXLOC, & MINLOC).  These are the cases without DIM= or
41 // cases where the argument has rank 1 and DIM=, if present, must be 1.
42 template <typename TYPE, typename ACCUMULATOR>
DoTotalReduction(const Descriptor & x,int dim,const Descriptor * mask,ACCUMULATOR & accumulator,const char * intrinsic,Terminator & terminator)43 inline void DoTotalReduction(const Descriptor &x, int dim,
44     const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic,
45     Terminator &terminator) {
46   if (dim < 0 || dim > 1) {
47     terminator.Crash("%s: bad DIM=%d for ARRAY argument with rank %d",
48         intrinsic, dim, x.rank());
49   }
50   SubscriptValue xAt[maxRank];
51   x.GetLowerBounds(xAt);
52   if (mask) {
53     CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
54     SubscriptValue maskAt[maxRank];
55     mask->GetLowerBounds(maskAt);
56     if (mask->rank() > 0) {
57       for (auto elements{x.Elements()}; elements--;
58            x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) {
59         if (IsLogicalElementTrue(*mask, maskAt)) {
60           accumulator.template AccumulateAt<TYPE>(xAt);
61         }
62       }
63       return;
64     } else if (!IsLogicalElementTrue(*mask, maskAt)) {
65       // scalar MASK=.FALSE.: return identity value
66       return;
67     }
68   }
69   // No MASK=, or scalar MASK=.TRUE.
70   for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
71     if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
72       break; // cut short, result is known
73     }
74   }
75 }
76 
77 template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
GetTotalReduction(const Descriptor & x,const char * source,int line,int dim,const Descriptor * mask,ACCUMULATOR && accumulator,const char * intrinsic)78 inline CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
79     const char *source, int line, int dim, const Descriptor *mask,
80     ACCUMULATOR &&accumulator, const char *intrinsic) {
81   Terminator terminator{source, line};
82   RUNTIME_CHECK(terminator, TypeCode(CAT, KIND) == x.type());
83   using CppType = CppTypeFor<CAT, KIND>;
84   DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
85   CppType result;
86 #ifdef _MSC_VER // work around MSVC spurious error
87   accumulator.GetResult(&result);
88 #else
89   accumulator.template GetResult(&result);
90 #endif
91   return result;
92 }
93 
94 // For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
95 // of the array is [2,3,5], the shape of the result is [2,5] and
96 // result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
97 // lower bounds other than one.  This utility subroutine creates an
98 // array of subscripts [j,_,k] for result subscripts [j,k] so that the
99 // elemets of array(j,:,k) can be reduced.
GetExpandedSubscripts(SubscriptValue at[],const Descriptor & descriptor,int zeroBasedDim,const SubscriptValue from[])100 inline void GetExpandedSubscripts(SubscriptValue at[],
101     const Descriptor &descriptor, int zeroBasedDim,
102     const SubscriptValue from[]) {
103   descriptor.GetLowerBounds(at);
104   int rank{descriptor.rank()};
105   int j{0};
106   for (; j < zeroBasedDim; ++j) {
107     at[j] += from[j] - 1 /*lower bound*/;
108   }
109   for (++j; j < rank; ++j) {
110     at[j] += from[j - 1] - 1;
111   }
112 }
113 
114 template <typename TYPE, typename ACCUMULATOR>
ReduceDimToScalar(const Descriptor & x,int zeroBasedDim,SubscriptValue subscripts[],TYPE * result,ACCUMULATOR & accumulator)115 inline void ReduceDimToScalar(const Descriptor &x, int zeroBasedDim,
116     SubscriptValue subscripts[], TYPE *result, ACCUMULATOR &accumulator) {
117   SubscriptValue xAt[maxRank];
118   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
119   const auto &dim{x.GetDimension(zeroBasedDim)};
120   SubscriptValue at{dim.LowerBound()};
121   for (auto n{dim.Extent()}; n-- > 0; ++at) {
122     xAt[zeroBasedDim] = at;
123     if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
124       break;
125     }
126   }
127 #ifdef _MSC_VER // work around MSVC spurious error
128   accumulator.GetResult(result, zeroBasedDim);
129 #else
130   accumulator.template GetResult(result, zeroBasedDim);
131 #endif
132 }
133 
134 template <typename TYPE, typename ACCUMULATOR>
ReduceDimMaskToScalar(const Descriptor & x,int zeroBasedDim,SubscriptValue subscripts[],const Descriptor & mask,TYPE * result,ACCUMULATOR & accumulator)135 inline void ReduceDimMaskToScalar(const Descriptor &x, int zeroBasedDim,
136     SubscriptValue subscripts[], const Descriptor &mask, TYPE *result,
137     ACCUMULATOR &accumulator) {
138   SubscriptValue xAt[maxRank], maskAt[maxRank];
139   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
140   GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
141   const auto &xDim{x.GetDimension(zeroBasedDim)};
142   SubscriptValue xPos{xDim.LowerBound()};
143   const auto &maskDim{mask.GetDimension(zeroBasedDim)};
144   SubscriptValue maskPos{maskDim.LowerBound()};
145   for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0;
146        ++xPos, ++maskPos) {
147     maskAt[zeroBasedDim] = maskPos;
148     if (IsLogicalElementTrue(mask, maskAt)) {
149       xAt[zeroBasedDim] = xPos;
150       if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
151         break;
152       }
153     }
154   }
155 #ifdef _MSC_VER // work around MSVC spurious error
156   accumulator.GetResult(result, zeroBasedDim);
157 #else
158   accumulator.template GetResult(result, zeroBasedDim);
159 #endif
160 }
161 
162 // Utility: establishes & allocates the result array for a partial
163 // reduction (i.e., one with DIM=).
CreatePartialReductionResult(Descriptor & result,const Descriptor & x,int dim,Terminator & terminator,const char * intrinsic,TypeCode typeCode)164 static void CreatePartialReductionResult(Descriptor &result,
165     const Descriptor &x, int dim, Terminator &terminator, const char *intrinsic,
166     TypeCode typeCode) {
167   int xRank{x.rank()};
168   if (dim < 1 || dim > xRank) {
169     terminator.Crash(
170         "%s: bad DIM=%d for ARRAY with rank %d", intrinsic, dim, xRank);
171   }
172   int zeroBasedDim{dim - 1};
173   SubscriptValue resultExtent[maxRank];
174   for (int j{0}; j < zeroBasedDim; ++j) {
175     resultExtent[j] = x.GetDimension(j).Extent();
176   }
177   for (int j{zeroBasedDim + 1}; j < xRank; ++j) {
178     resultExtent[j - 1] = x.GetDimension(j).Extent();
179   }
180   result.Establish(typeCode, x.ElementBytes(), nullptr, xRank - 1, resultExtent,
181       CFI_attribute_allocatable);
182   for (int j{0}; j + 1 < xRank; ++j) {
183     result.GetDimension(j).SetBounds(1, resultExtent[j]);
184   }
185   if (int stat{result.Allocate()}) {
186     terminator.Crash(
187         "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
188   }
189 }
190 
191 // Partial reductions with DIM=
192 
193 template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
PartialReduction(Descriptor & result,const Descriptor & x,int dim,const Descriptor * mask,Terminator & terminator,const char * intrinsic,ACCUMULATOR & accumulator)194 inline void PartialReduction(Descriptor &result, const Descriptor &x, int dim,
195     const Descriptor *mask, Terminator &terminator, const char *intrinsic,
196     ACCUMULATOR &accumulator) {
197   CreatePartialReductionResult(
198       result, x, dim, terminator, intrinsic, TypeCode{CAT, KIND});
199   SubscriptValue at[maxRank];
200   result.GetLowerBounds(at);
201   INTERNAL_CHECK(result.rank() == 0 || at[0] == 1);
202   using CppType = CppTypeFor<CAT, KIND>;
203   if (mask) {
204     CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
205     SubscriptValue maskAt[maxRank]; // contents unused
206     if (mask->rank() > 0) {
207       for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
208         accumulator.Reinitialize();
209         ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
210             x, dim - 1, at, *mask, result.Element<CppType>(at), accumulator);
211       }
212       return;
213     } else if (!IsLogicalElementTrue(*mask, maskAt)) {
214       // scalar MASK=.FALSE.
215       accumulator.Reinitialize();
216       for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
217         accumulator.GetResult(result.Element<CppType>(at));
218       }
219       return;
220     }
221   }
222   // No MASK= or scalar MASK=.TRUE.
223   for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
224     accumulator.Reinitialize();
225     ReduceDimToScalar<CppType, ACCUMULATOR>(
226         x, dim - 1, at, result.Element<CppType>(at), accumulator);
227   }
228 }
229 
230 template <template <typename> class ACCUM>
231 struct PartialIntegerReductionHelper {
232   template <int KIND> struct Functor {
233     static constexpr int Intermediate{
234         std::max(KIND, 4)}; // use at least "int" for intermediate results
operatorPartialIntegerReductionHelper::Functor235     void operator()(Descriptor &result, const Descriptor &x, int dim,
236         const Descriptor *mask, Terminator &terminator,
237         const char *intrinsic) const {
238       using Accumulator =
239           ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>;
240       Accumulator accumulator{x};
241       PartialReduction<Accumulator, TypeCategory::Integer, KIND>(
242           result, x, dim, mask, terminator, intrinsic, accumulator);
243     }
244   };
245 };
246 
247 template <template <typename> class INTEGER_ACCUM>
PartialIntegerReduction(Descriptor & result,const Descriptor & x,int dim,int kind,const Descriptor * mask,const char * intrinsic,Terminator & terminator)248 inline void PartialIntegerReduction(Descriptor &result, const Descriptor &x,
249     int dim, int kind, const Descriptor *mask, const char *intrinsic,
250     Terminator &terminator) {
251   ApplyIntegerKind<
252       PartialIntegerReductionHelper<INTEGER_ACCUM>::template Functor, void>(
253       kind, terminator, result, x, dim, mask, terminator, intrinsic);
254 }
255 
256 template <TypeCategory CAT, template <typename> class ACCUM>
257 struct PartialFloatingReductionHelper {
258   template <int KIND> struct Functor {
259     static constexpr int Intermediate{
260         std::max(KIND, 8)}; // use at least "double" for intermediate results
operatorPartialFloatingReductionHelper::Functor261     void operator()(Descriptor &result, const Descriptor &x, int dim,
262         const Descriptor *mask, Terminator &terminator,
263         const char *intrinsic) const {
264       using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>;
265       Accumulator accumulator{x};
266       PartialReduction<Accumulator, CAT, KIND>(
267           result, x, dim, mask, terminator, intrinsic, accumulator);
268     }
269   };
270 };
271 
272 template <template <typename> class INTEGER_ACCUM,
273     template <typename> class REAL_ACCUM,
274     template <typename> class COMPLEX_ACCUM>
TypedPartialNumericReduction(Descriptor & result,const Descriptor & x,int dim,const char * source,int line,const Descriptor * mask,const char * intrinsic)275 inline void TypedPartialNumericReduction(Descriptor &result,
276     const Descriptor &x, int dim, const char *source, int line,
277     const Descriptor *mask, const char *intrinsic) {
278   Terminator terminator{source, line};
279   auto catKind{x.type().GetCategoryAndKind()};
280   RUNTIME_CHECK(terminator, catKind.has_value());
281   switch (catKind->first) {
282   case TypeCategory::Integer:
283     PartialIntegerReduction<INTEGER_ACCUM>(
284         result, x, dim, catKind->second, mask, intrinsic, terminator);
285     break;
286   case TypeCategory::Real:
287     ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real,
288                                REAL_ACCUM>::template Functor,
289         void>(catKind->second, terminator, result, x, dim, mask, terminator,
290         intrinsic);
291     break;
292   case TypeCategory::Complex:
293     ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex,
294                                COMPLEX_ACCUM>::template Functor,
295         void>(catKind->second, terminator, result, x, dim, mask, terminator,
296         intrinsic);
297     break;
298   default:
299     terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
300   }
301 }
302 
303 template <typename ACCUMULATOR> struct LocationResultHelper {
304   template <int KIND> struct Functor {
operatorLocationResultHelper::Functor305     void operator()(ACCUMULATOR &accumulator, const Descriptor &result) const {
306       accumulator.GetResult(
307           result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>());
308     }
309   };
310 };
311 
312 template <typename ACCUMULATOR> struct PartialLocationHelper {
313   template <int KIND> struct Functor {
operatorPartialLocationHelper::Functor314     void operator()(Descriptor &result, const Descriptor &x, int dim,
315         const Descriptor *mask, Terminator &terminator, const char *intrinsic,
316         ACCUMULATOR &accumulator) const {
317       PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(
318           result, x, dim, mask, terminator, intrinsic, accumulator);
319     }
320   };
321 };
322 
323 } // namespace Fortran::runtime
324 #endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
325