1 //===-- runtime/reduction.cpp ---------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // Implements ALL, ANY, COUNT, MAXLOC, MAXVAL, MINLOC, MINVAL, PRODUCT, and SUM
10 // for all required operand types and shapes and (for MAXLOC & MINLOC) kinds of
11 // results.
12 //
13 // * Real and complex SUM reductions attempt to reduce floating-point
14 //   cancellation on intermediate results by adding up partial sums
15 //   for positive and negative elements independently.
16 // * Partial reductions (i.e., those with DIM= arguments that are not
17 //   required to be 1 by the rank of the argument) return arrays that
18 //   are dynamically allocated in a caller-supplied descriptor.
19 // * Total reductions (i.e., no DIM= argument) with MAXLOC & MINLOC
20 //   return integer vectors of some kind, not scalars; a caller-supplied
21 //   descriptor is used
22 // * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
23 //   length results, dynamically allocated in a caller-supplied descriptor
24 
25 #include "reduction.h"
26 #include "character.h"
27 #include "cpp-type.h"
28 #include "terminator.h"
29 #include "tools.h"
30 #include "flang/Common/long-double.h"
31 #include <cinttypes>
32 #include <complex>
33 #include <limits>
34 #include <type_traits>
35 
36 namespace Fortran::runtime {
37 
38 // Generic reduction templates
39 
40 // Reductions are implemented with *accumulators*, which are instances of
41 // classes that incrementally build up the result (or an element thereof) during
42 // a traversal of the unmasked elements of an array.  Each accumulator class
43 // supports a constructor (which captures a reference to the array), an
44 // AccumulateAt() member function that applies supplied subscripts to the
45 // array and does something with a scalar element, and a GetResult()
46 // member function that copies a final result into its destination.
47 
48 // Total reduction of the array argument to a scalar (or to a vector in the
49 // cases of MAXLOC & MINLOC).  These are the cases without DIM= or cases
50 // where the argument has rank 1 and DIM=, if present, must be 1.
51 template <typename TYPE, typename ACCUMULATOR>
52 inline void DoTotalReduction(const Descriptor &x, int dim,
53     const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic,
54     Terminator &terminator) {
55   if (dim < 0 || dim > 1) {
56     terminator.Crash(
57         "%s: bad DIM=%d for argument with rank %d", intrinsic, dim, x.rank());
58   }
59   SubscriptValue xAt[maxRank];
60   x.GetLowerBounds(xAt);
61   if (mask) {
62     CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
63     SubscriptValue maskAt[maxRank];
64     mask->GetLowerBounds(maskAt);
65     if (mask->rank() > 0) {
66       for (auto elements{x.Elements()}; elements--;
67            x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) {
68         if (IsLogicalElementTrue(*mask, maskAt)) {
69           accumulator.template AccumulateAt<TYPE>(xAt);
70         }
71       }
72       return;
73     } else if (!IsLogicalElementTrue(*mask, maskAt)) {
74       // scalar MASK=.FALSE.: return identity value
75       return;
76     }
77   }
78   // No MASK=, or scalar MASK=.TRUE.
79   for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
80     if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
81       break; // cut short, result is known
82     }
83   }
84 }
85 
86 template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
87 inline CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
88     const char *source, int line, int dim, const Descriptor *mask,
89     ACCUMULATOR &&accumulator, const char *intrinsic) {
90   Terminator terminator{source, line};
91   RUNTIME_CHECK(terminator, TypeCode(CAT, KIND) == x.type());
92   using CppType = CppTypeFor<CAT, KIND>;
93   DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
94   CppType result;
95 #ifdef _MSC_VER // work around MSVC spurious error
96   accumulator.GetResult(&result);
97 #else
98   accumulator.template GetResult(&result);
99 #endif
100   return result;
101 }
102 
103 // For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
104 // of the array is [2,3,5], the shape of the result is [2,5] and
105 // result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
106 // lower bounds other than one.  This utility subroutine creates an
107 // array of subscripts [j,_,k] for result subscripts [j,k] so that the
108 // elemets of array(j,:,k) can be reduced.
109 inline void GetExpandedSubscripts(SubscriptValue at[],
110     const Descriptor &descriptor, int zeroBasedDim,
111     const SubscriptValue from[]) {
112   descriptor.GetLowerBounds(at);
113   int rank{descriptor.rank()};
114   int j{0};
115   for (; j < zeroBasedDim; ++j) {
116     at[j] += from[j] - 1 /*lower bound*/;
117   }
118   for (++j; j < rank; ++j) {
119     at[j] += from[j - 1] - 1;
120   }
121 }
122 
123 template <typename TYPE, typename ACCUMULATOR>
124 inline void ReduceDimToScalar(const Descriptor &x, int zeroBasedDim,
125     SubscriptValue subscripts[], TYPE *result) {
126   ACCUMULATOR accumulator{x};
127   SubscriptValue xAt[maxRank];
128   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
129   const auto &dim{x.GetDimension(zeroBasedDim)};
130   SubscriptValue at{dim.LowerBound()};
131   for (auto n{dim.Extent()}; n-- > 0; ++at) {
132     xAt[zeroBasedDim] = at;
133     if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
134       break;
135     }
136   }
137 #ifdef _MSC_VER // work around MSVC spurious error
138   accumulator.GetResult(result, zeroBasedDim);
139 #else
140   accumulator.template GetResult(result, zeroBasedDim);
141 #endif
142 }
143 
144 template <typename TYPE, typename ACCUMULATOR>
145 inline void ReduceDimMaskToScalar(const Descriptor &x, int zeroBasedDim,
146     SubscriptValue subscripts[], const Descriptor &mask, TYPE *result) {
147   ACCUMULATOR accumulator{x};
148   SubscriptValue xAt[maxRank], maskAt[maxRank];
149   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
150   GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
151   const auto &xDim{x.GetDimension(zeroBasedDim)};
152   SubscriptValue xPos{xDim.LowerBound()};
153   const auto &maskDim{mask.GetDimension(zeroBasedDim)};
154   SubscriptValue maskPos{maskDim.LowerBound()};
155   for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0;
156        ++xPos, ++maskPos) {
157     maskAt[zeroBasedDim] = maskPos;
158     if (IsLogicalElementTrue(mask, maskAt)) {
159       xAt[zeroBasedDim] = xPos;
160       if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
161         break;
162       }
163     }
164   }
165 #ifdef _MSC_VER // work around MSVC spurious error
166   accumulator.GetResult(result, zeroBasedDim);
167 #else
168   accumulator.template GetResult(result, zeroBasedDim);
169 #endif
170 }
171 
172 // Utility: establishes & allocates the result array for a partial
173 // reduction (i.e., one with DIM=).
174 static void CreatePartialReductionResult(Descriptor &result,
175     const Descriptor &x, int dim, Terminator &terminator, const char *intrinsic,
176     TypeCode typeCode) {
177   int xRank{x.rank()};
178   if (dim < 1 || dim > xRank) {
179     terminator.Crash("%s: bad DIM=%d for rank %d", intrinsic, dim, xRank);
180   }
181   int zeroBasedDim{dim - 1};
182   SubscriptValue resultExtent[maxRank];
183   for (int j{0}; j < zeroBasedDim; ++j) {
184     resultExtent[j] = x.GetDimension(j).Extent();
185   }
186   for (int j{zeroBasedDim + 1}; j < xRank; ++j) {
187     resultExtent[j - 1] = x.GetDimension(j).Extent();
188   }
189   result.Establish(typeCode, x.ElementBytes(), nullptr, xRank - 1, resultExtent,
190       CFI_attribute_allocatable);
191   for (int j{0}; j + 1 < xRank; ++j) {
192     result.GetDimension(j).SetBounds(1, resultExtent[j]);
193   }
194   if (int stat{result.Allocate()}) {
195     terminator.Crash(
196         "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
197   }
198 }
199 
200 // Partial reductions with DIM=
201 
202 template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
203 inline void PartialReduction(Descriptor &result, const Descriptor &x, int dim,
204     const Descriptor *mask, Terminator &terminator, const char *intrinsic) {
205   CreatePartialReductionResult(
206       result, x, dim, terminator, intrinsic, TypeCode{CAT, KIND});
207   SubscriptValue at[maxRank];
208   result.GetLowerBounds(at);
209   INTERNAL_CHECK(at[0] == 1);
210   using CppType = CppTypeFor<CAT, KIND>;
211   if (mask) {
212     CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
213     SubscriptValue maskAt[maxRank]; // contents unused
214     if (mask->rank() > 0) {
215       for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
216         ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
217             x, dim - 1, at, *mask, result.Element<CppType>(at));
218       }
219       return;
220     } else if (!IsLogicalElementTrue(*mask, maskAt)) {
221       // scalar MASK=.FALSE.
222       ACCUMULATOR accumulator{x};
223       for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
224         accumulator.GetResult(result.Element<CppType>(at));
225       }
226       return;
227     }
228   }
229   // No MASK= or scalar MASK=.TRUE.
230   for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
231     ReduceDimToScalar<CppType, ACCUMULATOR>(
232         x, dim - 1, at, result.Element<CppType>(at));
233   }
234 }
235 
236 template <template <typename> class INTEGER_ACCUM,
237     template <typename> class REAL_ACCUM,
238     template <typename> class COMPLEX_ACCUM>
239 inline void TypedPartialNumericReduction(Descriptor &result,
240     const Descriptor &x, int dim, const char *source, int line,
241     const Descriptor *mask, const char *intrinsic) {
242   Terminator terminator{source, line};
243   auto catKind{x.type().GetCategoryAndKind()};
244   RUNTIME_CHECK(terminator, catKind.has_value());
245   switch (catKind->first) {
246   case TypeCategory::Integer:
247     switch (catKind->second) {
248     case 1:
249       PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 4>>,
250           TypeCategory::Integer, 1>(
251           result, x, dim, mask, terminator, intrinsic);
252       return;
253     case 2:
254       PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 4>>,
255           TypeCategory::Integer, 2>(
256           result, x, dim, mask, terminator, intrinsic);
257       return;
258     case 4:
259       PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 4>>,
260           TypeCategory::Integer, 4>(
261           result, x, dim, mask, terminator, intrinsic);
262       return;
263     case 8:
264       PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 8>>,
265           TypeCategory::Integer, 8>(
266           result, x, dim, mask, terminator, intrinsic);
267       return;
268 #ifdef __SIZEOF_INT128__
269     case 16:
270       PartialReduction<INTEGER_ACCUM<CppTypeFor<TypeCategory::Integer, 16>>,
271           TypeCategory::Integer, 16>(
272           result, x, dim, mask, terminator, intrinsic);
273       return;
274 #endif
275     }
276     break;
277   case TypeCategory::Real:
278     switch (catKind->second) {
279 #if 0 // TODO
280     case 2:
281     case 3:
282 #endif
283     case 4:
284       PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
285           TypeCategory::Real, 4>(result, x, dim, mask, terminator, intrinsic);
286       return;
287     case 8:
288       PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
289           TypeCategory::Real, 8>(result, x, dim, mask, terminator, intrinsic);
290       return;
291 #if LONG_DOUBLE == 80
292     case 10:
293       PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 10>>,
294           TypeCategory::Real, 10>(result, x, dim, mask, terminator, intrinsic);
295       return;
296 #elif LONG_DOUBLE == 128
297     case 16:
298       PartialReduction<REAL_ACCUM<CppTypeFor<TypeCategory::Real, 16>>,
299           TypeCategory::Real, 16>(result, x, dim, mask, terminator, intrinsic);
300       return;
301 #endif
302     }
303     break;
304   case TypeCategory::Complex:
305     switch (catKind->second) {
306 #if 0 // TODO
307     case 2:
308     case 3:
309 #endif
310     case 4:
311       PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
312           TypeCategory::Complex, 4>(
313           result, x, dim, mask, terminator, intrinsic);
314       return;
315     case 8:
316       PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 8>>,
317           TypeCategory::Complex, 8>(
318           result, x, dim, mask, terminator, intrinsic);
319       return;
320 #if LONG_DOUBLE == 80
321     case 10:
322       PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 10>>,
323           TypeCategory::Complex, 10>(
324           result, x, dim, mask, terminator, intrinsic);
325       return;
326 #elif LONG_DOUBLE == 128
327     case 16:
328       PartialReduction<COMPLEX_ACCUM<CppTypeFor<TypeCategory::Real, 16>>,
329           TypeCategory::Complex, 16>(
330           result, x, dim, mask, terminator, intrinsic);
331       return;
332 #endif
333     }
334     break;
335   default:
336     break;
337   }
338   terminator.Crash("%s: invalid type code %d", intrinsic, x.type().raw());
339 }
340 
341 // SUM()
342 
343 template <typename INTERMEDIATE> class IntegerSumAccumulator {
344 public:
345   explicit IntegerSumAccumulator(const Descriptor &array) : array_{array} {}
346   template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
347     *p = static_cast<A>(sum_);
348   }
349   template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
350     sum_ += *array_.Element<A>(at);
351     return true;
352   }
353 
354 private:
355   const Descriptor &array_;
356   INTERMEDIATE sum_{0};
357 };
358 
359 template <typename INTERMEDIATE> class RealSumAccumulator {
360 public:
361   explicit RealSumAccumulator(const Descriptor &array) : array_{array} {}
362   template <typename A> A Result() const {
363     auto sum{static_cast<A>(positives_ + negatives_)};
364     return std::isfinite(sum) ? sum : static_cast<A>(inOrder_);
365   }
366   template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
367     *p = Result<A>();
368   }
369   template <typename A> bool Accumulate(A x) {
370     // Accumulate the nonnegative and negative elements independently
371     // to reduce cancellation; also record an in-order sum for use
372     // in case of overflow.
373     if (x >= 0) {
374       positives_ += x;
375     } else {
376       negatives_ += x;
377     }
378     inOrder_ += x;
379     return true;
380   }
381   template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
382     return Accumulate(*array_.Element<A>(at));
383   }
384 
385 private:
386   const Descriptor &array_;
387   INTERMEDIATE positives_{0.0}, negatives_{0.0}, inOrder_{0.0};
388 };
389 
390 template <typename PART> class ComplexSumAccumulator {
391 public:
392   explicit ComplexSumAccumulator(const Descriptor &array) : array_{array} {}
393   template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
394     using ResultPart = typename A::value_type;
395     *p = {reals_.template Result<ResultPart>(),
396         imaginaries_.template Result<ResultPart>()};
397   }
398   template <typename A> bool Accumulate(const A &z) {
399     reals_.Accumulate(z.real());
400     imaginaries_.Accumulate(z.imag());
401     return true;
402   }
403   template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
404     return Accumulate(*array_.Element<A>(at));
405   }
406 
407 private:
408   const Descriptor &array_;
409   RealSumAccumulator<PART> reals_{array_}, imaginaries_{array_};
410 };
411 
412 extern "C" {
413 CppTypeFor<TypeCategory::Integer, 1> RTNAME(SumInteger1)(const Descriptor &x,
414     const char *source, int line, int dim, const Descriptor *mask) {
415   return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask,
416       IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "SUM");
417 }
418 CppTypeFor<TypeCategory::Integer, 2> RTNAME(SumInteger2)(const Descriptor &x,
419     const char *source, int line, int dim, const Descriptor *mask) {
420   return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask,
421       IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "SUM");
422 }
423 CppTypeFor<TypeCategory::Integer, 4> RTNAME(SumInteger4)(const Descriptor &x,
424     const char *source, int line, int dim, const Descriptor *mask) {
425   return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask,
426       IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x}, "SUM");
427 }
428 CppTypeFor<TypeCategory::Integer, 8> RTNAME(SumInteger8)(const Descriptor &x,
429     const char *source, int line, int dim, const Descriptor *mask) {
430   return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask,
431       IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x}, "SUM");
432 }
433 #ifdef __SIZEOF_INT128__
434 CppTypeFor<TypeCategory::Integer, 16> RTNAME(SumInteger16)(const Descriptor &x,
435     const char *source, int line, int dim, const Descriptor *mask) {
436   return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim,
437       mask, IntegerSumAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x},
438       "SUM");
439 }
440 #endif
441 
442 // TODO: real/complex(2 & 3)
443 CppTypeFor<TypeCategory::Real, 4> RTNAME(SumReal4)(const Descriptor &x,
444     const char *source, int line, int dim, const Descriptor *mask) {
445   return GetTotalReduction<TypeCategory::Real, 4>(
446       x, source, line, dim, mask, RealSumAccumulator<double>{x}, "SUM");
447 }
448 CppTypeFor<TypeCategory::Real, 8> RTNAME(SumReal8)(const Descriptor &x,
449     const char *source, int line, int dim, const Descriptor *mask) {
450   return GetTotalReduction<TypeCategory::Real, 8>(
451       x, source, line, dim, mask, RealSumAccumulator<double>{x}, "SUM");
452 }
453 #if LONG_DOUBLE == 80
454 CppTypeFor<TypeCategory::Real, 10> RTNAME(SumReal10)(const Descriptor &x,
455     const char *source, int line, int dim, const Descriptor *mask) {
456   return GetTotalReduction<TypeCategory::Real, 10>(
457       x, source, line, dim, mask, RealSumAccumulator<long double>{x}, "SUM");
458 }
459 #elif LONG_DOUBLE == 128
460 CppTypeFor<TypeCategory::Real, 16> RTNAME(SumReal16)(const Descriptor &x,
461     const char *source, int line, int dim, const Descriptor *mask) {
462   return GetTotalReduction<TypeCategory::Real, 16>(
463       x, source, line, dim, mask, RealSumAccumulator<long double>{x}, "SUM");
464 }
465 #endif
466 
467 void RTNAME(CppSumComplex4)(CppTypeFor<TypeCategory::Complex, 4> &result,
468     const Descriptor &x, const char *source, int line, int dim,
469     const Descriptor *mask) {
470   result = GetTotalReduction<TypeCategory::Complex, 4>(
471       x, source, line, dim, mask, ComplexSumAccumulator<double>{x}, "SUM");
472 }
473 void RTNAME(CppSumComplex8)(CppTypeFor<TypeCategory::Complex, 8> &result,
474     const Descriptor &x, const char *source, int line, int dim,
475     const Descriptor *mask) {
476   result = GetTotalReduction<TypeCategory::Complex, 8>(
477       x, source, line, dim, mask, ComplexSumAccumulator<double>{x}, "SUM");
478 }
479 #if LONG_DOUBLE == 80
480 void RTNAME(CppSumComplex10)(CppTypeFor<TypeCategory::Complex, 10> &result,
481     const Descriptor &x, const char *source, int line, int dim,
482     const Descriptor *mask) {
483   result = GetTotalReduction<TypeCategory::Complex, 10>(
484       x, source, line, dim, mask, ComplexSumAccumulator<long double>{x}, "SUM");
485 }
486 #elif LONG_DOUBLE == 128
487 void RTNAME(CppSumComplex16)(CppTypeFor<TypeCategory::Complex, 16> &result,
488     const Descriptor &x, const char *source, int line, int dim,
489     const Descriptor *mask) {
490   result = GetTotalReduction<TypeCategory::Complex, 16>(
491       x, source, line, dim, mask, ComplexSumAccumulator<long double>{x}, "SUM");
492 }
493 #endif
494 
495 void RTNAME(SumDim)(Descriptor &result, const Descriptor &x, int dim,
496     const char *source, int line, const Descriptor *mask) {
497   TypedPartialNumericReduction<IntegerSumAccumulator, RealSumAccumulator,
498       ComplexSumAccumulator>(result, x, dim, source, line, mask, "SUM");
499 }
500 } // extern "C"
501 
502 // PRODUCT()
503 
504 template <typename INTERMEDIATE> class NonComplexProductAccumulator {
505 public:
506   explicit NonComplexProductAccumulator(const Descriptor &array)
507       : array_{array} {}
508   template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
509     *p = static_cast<A>(product_);
510   }
511   template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
512     product_ *= *array_.Element<A>(at);
513     return product_ != 0;
514   }
515 
516 private:
517   const Descriptor &array_;
518   INTERMEDIATE product_{1};
519 };
520 
521 template <typename PART> class ComplexProductAccumulator {
522 public:
523   explicit ComplexProductAccumulator(const Descriptor &array) : array_{array} {}
524   template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
525     using ResultPart = typename A::value_type;
526     *p = {static_cast<ResultPart>(product_.real()),
527         static_cast<ResultPart>(product_.imag())};
528   }
529   template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
530     product_ *= *array_.Element<A>(at);
531     return true;
532   }
533 
534 private:
535   const Descriptor &array_;
536   std::complex<PART> product_{1, 0};
537 };
538 
539 extern "C" {
540 CppTypeFor<TypeCategory::Integer, 1> RTNAME(ProductInteger1)(
541     const Descriptor &x, const char *source, int line, int dim,
542     const Descriptor *mask) {
543   return GetTotalReduction<TypeCategory::Integer, 1>(x, source, line, dim, mask,
544       NonComplexProductAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
545       "PRODUCT");
546 }
547 CppTypeFor<TypeCategory::Integer, 2> RTNAME(ProductInteger2)(
548     const Descriptor &x, const char *source, int line, int dim,
549     const Descriptor *mask) {
550   return GetTotalReduction<TypeCategory::Integer, 2>(x, source, line, dim, mask,
551       NonComplexProductAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
552       "PRODUCT");
553 }
554 CppTypeFor<TypeCategory::Integer, 4> RTNAME(ProductInteger4)(
555     const Descriptor &x, const char *source, int line, int dim,
556     const Descriptor *mask) {
557   return GetTotalReduction<TypeCategory::Integer, 4>(x, source, line, dim, mask,
558       NonComplexProductAccumulator<CppTypeFor<TypeCategory::Integer, 4>>{x},
559       "PRODUCT");
560 }
561 CppTypeFor<TypeCategory::Integer, 8> RTNAME(ProductInteger8)(
562     const Descriptor &x, const char *source, int line, int dim,
563     const Descriptor *mask) {
564   return GetTotalReduction<TypeCategory::Integer, 8>(x, source, line, dim, mask,
565       NonComplexProductAccumulator<CppTypeFor<TypeCategory::Integer, 8>>{x},
566       "PRODUCT");
567 }
568 #ifdef __SIZEOF_INT128__
569 CppTypeFor<TypeCategory::Integer, 16> RTNAME(ProductInteger16)(
570     const Descriptor &x, const char *source, int line, int dim,
571     const Descriptor *mask) {
572   return GetTotalReduction<TypeCategory::Integer, 16>(x, source, line, dim,
573       mask,
574       NonComplexProductAccumulator<CppTypeFor<TypeCategory::Integer, 16>>{x},
575       "PRODUCT");
576 }
577 #endif
578 
579 // TODO: real/complex(2 & 3)
580 CppTypeFor<TypeCategory::Real, 4> RTNAME(ProductReal4)(const Descriptor &x,
581     const char *source, int line, int dim, const Descriptor *mask) {
582   return GetTotalReduction<TypeCategory::Real, 4>(x, source, line, dim, mask,
583       NonComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
584       "PRODUCT");
585 }
586 CppTypeFor<TypeCategory::Real, 8> RTNAME(ProductReal8)(const Descriptor &x,
587     const char *source, int line, int dim, const Descriptor *mask) {
588   return GetTotalReduction<TypeCategory::Real, 8>(x, source, line, dim, mask,
589       NonComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
590       "PRODUCT");
591 }
592 #if LONG_DOUBLE == 80
593 CppTypeFor<TypeCategory::Real, 10> RTNAME(ProductReal10)(const Descriptor &x,
594     const char *source, int line, int dim, const Descriptor *mask) {
595   return GetTotalReduction<TypeCategory::Real, 10>(x, source, line, dim, mask,
596       NonComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 10>>{x},
597       "PRODUCT");
598 }
599 #elif LONG_DOUBLE == 128
600 CppTypeFor<TypeCategory::Real, 16> RTNAME(ProductReal16)(const Descriptor &x,
601     const char *source, int line, int dim, const Descriptor *mask) {
602   return GetTotalReduction<TypeCategory::Real, 16>(x, source, line, dim, mask,
603       NonComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 16>>{x},
604       "PRODUCT");
605 }
606 #endif
607 
608 void RTNAME(CppProductComplex4)(CppTypeFor<TypeCategory::Complex, 4> &result,
609     const Descriptor &x, const char *source, int line, int dim,
610     const Descriptor *mask) {
611   result = GetTotalReduction<TypeCategory::Complex, 4>(x, source, line, dim,
612       mask, ComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
613       "PRODUCT");
614 }
615 void RTNAME(CppProductComplex8)(CppTypeFor<TypeCategory::Complex, 8> &result,
616     const Descriptor &x, const char *source, int line, int dim,
617     const Descriptor *mask) {
618   result = GetTotalReduction<TypeCategory::Complex, 8>(x, source, line, dim,
619       mask, ComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 8>>{x},
620       "PRODUCT");
621 }
622 #if LONG_DOUBLE == 80
623 void RTNAME(CppProductComplex10)(CppTypeFor<TypeCategory::Complex, 10> &result,
624     const Descriptor &x, const char *source, int line, int dim,
625     const Descriptor *mask) {
626   result = GetTotalReduction<TypeCategory::Complex, 10>(x, source, line, dim,
627       mask, ComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 10>>{x},
628       "PRODUCT");
629 }
630 #elif LONG_DOUBLE == 128
631 void RTNAME(CppProductComplex16)(CppTypeFor<TypeCategory::Complex, 16> &result,
632     const Descriptor &x, const char *source, int line, int dim,
633     const Descriptor *mask) {
634   result = GetTotalReduction<TypeCategory::Complex, 16>(x, source, line, dim,
635       mask, ComplexProductAccumulator<CppTypeFor<TypeCategory::Real, 16>>{x},
636       "PRODUCT");
637 }
638 #endif
639 
640 void RTNAME(ProductDim)(Descriptor &result, const Descriptor &x, int dim,
641     const char *source, int line, const Descriptor *mask) {
642   TypedPartialNumericReduction<NonComplexProductAccumulator,
643       NonComplexProductAccumulator, ComplexProductAccumulator>(
644       result, x, dim, source, line, mask, "PRODUCT");
645 }
646 } // extern "C"
647 
648 // MAXLOC and MINLOC
649 
650 template <typename T, bool IS_MAX, bool BACK> struct NumericCompare {
651   using Type = T;
652   explicit NumericCompare(std::size_t /*elemLen; ignored*/) {}
653   bool operator()(const T &value, const T &previous) {
654     if (BACK && value == previous) {
655       return true;
656     } else if constexpr (IS_MAX) {
657       return value > previous;
658     } else {
659       return value < previous;
660     }
661   }
662 };
663 
664 template <typename T, bool IS_MAX, bool BACK> class CharacterCompare {
665 public:
666   using Type = T;
667   explicit CharacterCompare(std::size_t elemLen)
668       : chars_{elemLen / sizeof(T)} {}
669   bool operator()(const T &value, const T &previous) {
670     int cmp{CharacterScalarCompare<T>(&value, &previous, chars_, chars_)};
671     if (BACK && cmp == 0) {
672       return true;
673     } else if constexpr (IS_MAX) {
674       return cmp > 0;
675     } else {
676       return cmp < 0;
677     }
678   }
679 
680 private:
681   std::size_t chars_;
682 };
683 
684 template <typename COMPARE> class ExtremumLocAccumulator {
685 public:
686   using Type = typename COMPARE::Type;
687   ExtremumLocAccumulator(const Descriptor &array, std::size_t chars = 0)
688       : array_{array}, argRank_{array.rank()}, compare_{array.ElementBytes()} {
689     // per standard: result indices are all zero if no data
690     for (int j{0}; j < argRank_; ++j) {
691       extremumLoc_[j] = 0;
692     }
693   }
694   int argRank() const { return argRank_; }
695   template <typename A> void GetResult(A *p, int zeroBasedDim = -1) {
696     if (zeroBasedDim >= 0) {
697       *p = extremumLoc_[zeroBasedDim];
698     } else {
699       for (int j{0}; j < argRank_; ++j) {
700         p[j] = extremumLoc_[j];
701       }
702     }
703   }
704   template <typename IGNORED> bool AccumulateAt(const SubscriptValue at[]) {
705     const auto &value{*array_.Element<Type>(at)};
706     if (!previous_ || compare_(value, *previous_)) {
707       previous_ = &value;
708       for (int j{0}; j < argRank_; ++j) {
709         extremumLoc_[j] = at[j];
710       }
711     }
712     return true;
713   }
714 
715 private:
716   const Descriptor &array_;
717   int argRank_;
718   SubscriptValue extremumLoc_[maxRank];
719   const Type *previous_{nullptr};
720   COMPARE compare_;
721 };
722 
723 template <typename COMPARE, typename CPPTYPE>
724 static void DoMaxOrMinLocHelper(const char *intrinsic, Descriptor &result,
725     const Descriptor &x, int kind, const Descriptor *mask,
726     Terminator &terminator) {
727   ExtremumLocAccumulator<COMPARE> accumulator{x};
728   DoTotalReduction<CPPTYPE>(x, 0, mask, accumulator, intrinsic, terminator);
729   switch (kind) {
730   case 1:
731     accumulator.GetResult(
732         result.OffsetElement<CppTypeFor<TypeCategory::Integer, 1>>());
733     break;
734   case 2:
735     accumulator.GetResult(
736         result.OffsetElement<CppTypeFor<TypeCategory::Integer, 2>>());
737     break;
738   case 4:
739     accumulator.GetResult(
740         result.OffsetElement<CppTypeFor<TypeCategory::Integer, 4>>());
741     break;
742   case 8:
743     accumulator.GetResult(
744         result.OffsetElement<CppTypeFor<TypeCategory::Integer, 8>>());
745     break;
746 #ifdef __SIZEOF_INT128__
747   case 16:
748     accumulator.GetResult(
749         result.OffsetElement<CppTypeFor<TypeCategory::Integer, 16>>());
750     break;
751 #endif
752   default:
753     terminator.Crash("%s: bad KIND=%d", intrinsic, kind);
754   }
755 }
756 
757 template <TypeCategory CAT, int KIND, bool IS_MAX,
758     template <typename, bool, bool> class COMPARE>
759 inline void DoMaxOrMinLoc(const char *intrinsic, Descriptor &result,
760     const Descriptor &x, int kind, const char *source, int line,
761     const Descriptor *mask, bool back) {
762   using CppType = CppTypeFor<CAT, KIND>;
763   Terminator terminator{source, line};
764   if (back) {
765     DoMaxOrMinLocHelper<COMPARE<CppType, IS_MAX, true>, CppType>(
766         intrinsic, result, x, kind, mask, terminator);
767   } else {
768     DoMaxOrMinLocHelper<COMPARE<CppType, IS_MAX, false>, CppType>(
769         intrinsic, result, x, kind, mask, terminator);
770   }
771 }
772 
773 template <bool IS_MAX>
774 inline void TypedMaxOrMinLoc(const char *intrinsic, Descriptor &result,
775     const Descriptor &x, int kind, const char *source, int line,
776     const Descriptor *mask, bool back) {
777   int rank{x.rank()};
778   SubscriptValue extent[1]{rank};
779   result.Establish(TypeCategory::Integer, kind, nullptr, 1, extent,
780       CFI_attribute_allocatable);
781   result.GetDimension(0).SetBounds(1, extent[0]);
782   Terminator terminator{source, line};
783   if (int stat{result.Allocate()}) {
784     terminator.Crash(
785         "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
786   }
787   CheckIntegerKind(terminator, kind, intrinsic);
788   auto catKind{x.type().GetCategoryAndKind()};
789   RUNTIME_CHECK(terminator, catKind.has_value());
790   switch (catKind->first) {
791   case TypeCategory::Integer:
792     switch (catKind->second) {
793     case 1:
794       DoMaxOrMinLoc<TypeCategory::Integer, 1, IS_MAX, NumericCompare>(
795           intrinsic, result, x, kind, source, line, mask, back);
796       return;
797     case 2:
798       DoMaxOrMinLoc<TypeCategory::Integer, 2, IS_MAX, NumericCompare>(
799           intrinsic, result, x, kind, source, line, mask, back);
800       return;
801     case 4:
802       DoMaxOrMinLoc<TypeCategory::Integer, 4, IS_MAX, NumericCompare>(
803           intrinsic, result, x, kind, source, line, mask, back);
804       return;
805     case 8:
806       DoMaxOrMinLoc<TypeCategory::Integer, 8, IS_MAX, NumericCompare>(
807           intrinsic, result, x, kind, source, line, mask, back);
808       return;
809 #ifdef __SIZEOF_INT128__
810     case 16:
811       DoMaxOrMinLoc<TypeCategory::Integer, 16, IS_MAX, NumericCompare>(
812           intrinsic, result, x, kind, source, line, mask, back);
813       return;
814 #endif
815     }
816     break;
817   case TypeCategory::Real:
818     switch (catKind->second) {
819     // TODO: REAL(2 & 3)
820     case 4:
821       DoMaxOrMinLoc<TypeCategory::Real, 4, IS_MAX, NumericCompare>(
822           intrinsic, result, x, kind, source, line, mask, back);
823       return;
824     case 8:
825       DoMaxOrMinLoc<TypeCategory::Real, 8, IS_MAX, NumericCompare>(
826           intrinsic, result, x, kind, source, line, mask, back);
827       return;
828 #if LONG_DOUBLE == 80
829     case 10:
830       DoMaxOrMinLoc<TypeCategory::Real, 10, IS_MAX, NumericCompare>(
831           intrinsic, result, x, kind, source, line, mask, back);
832       return;
833 #elif LONG_DOUBLE == 128
834     case 16:
835       DoMaxOrMinLoc<TypeCategory::Real, 16, IS_MAX, NumericCompare>(
836           intrinsic, result, x, kind, source, line, mask, back);
837       return;
838 #endif
839     }
840     break;
841   case TypeCategory::Character:
842     switch (catKind->second) {
843     case 1:
844       DoMaxOrMinLoc<TypeCategory::Character, 1, IS_MAX, CharacterCompare>(
845           intrinsic, result, x, kind, source, line, mask, back);
846       return;
847     case 2:
848       DoMaxOrMinLoc<TypeCategory::Character, 2, IS_MAX, CharacterCompare>(
849           intrinsic, result, x, kind, source, line, mask, back);
850       return;
851     case 4:
852       DoMaxOrMinLoc<TypeCategory::Character, 4, IS_MAX, CharacterCompare>(
853           intrinsic, result, x, kind, source, line, mask, back);
854       return;
855     }
856     break;
857   default:
858     break;
859   }
860   terminator.Crash(
861       "%s: Bad data type code (%d) for array", intrinsic, x.type().raw());
862 }
863 
864 extern "C" {
865 void RTNAME(Maxloc)(Descriptor &result, const Descriptor &x, int kind,
866     const char *source, int line, const Descriptor *mask, bool back) {
867   TypedMaxOrMinLoc<true>("MAXLOC", result, x, kind, source, line, mask, back);
868 }
869 void RTNAME(Minloc)(Descriptor &result, const Descriptor &x, int kind,
870     const char *source, int line, const Descriptor *mask, bool back) {
871   TypedMaxOrMinLoc<false>("MINLOC", result, x, kind, source, line, mask, back);
872 }
873 } // extern "C"
874 
875 // MAXLOC/MINLOC with DIM=
876 
877 template <TypeCategory CAT, int KIND, bool IS_MAX,
878     template <typename, bool, bool> class COMPARE, bool BACK>
879 static void DoPartialMaxOrMinLocDirection(const char *intrinsic,
880     Descriptor &result, const Descriptor &x, int kind, int dim,
881     const Descriptor *mask, Terminator &terminator) {
882   using CppType = CppTypeFor<CAT, KIND>;
883   switch (kind) {
884   case 1:
885     PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
886         TypeCategory::Integer, 1>(result, x, dim, mask, terminator, intrinsic);
887     break;
888   case 2:
889     PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
890         TypeCategory::Integer, 2>(result, x, dim, mask, terminator, intrinsic);
891     break;
892   case 4:
893     PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
894         TypeCategory::Integer, 4>(result, x, dim, mask, terminator, intrinsic);
895     break;
896   case 8:
897     PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
898         TypeCategory::Integer, 8>(result, x, dim, mask, terminator, intrinsic);
899     break;
900 #ifdef __SIZEOF_INT128__
901   case 16:
902     PartialReduction<ExtremumLocAccumulator<COMPARE<CppType, IS_MAX, BACK>>,
903         TypeCategory::Integer, 16>(result, x, dim, mask, terminator, intrinsic);
904     break;
905 #endif
906   default:
907     terminator.Crash("%s: bad KIND=%d", intrinsic, kind);
908   }
909 }
910 
911 template <TypeCategory CAT, int KIND, bool IS_MAX,
912     template <typename, bool, bool> class COMPARE>
913 inline void DoPartialMaxOrMinLoc(const char *intrinsic, Descriptor &result,
914     const Descriptor &x, int kind, int dim, const Descriptor *mask, bool back,
915     Terminator &terminator) {
916   if (back) {
917     DoPartialMaxOrMinLocDirection<CAT, KIND, IS_MAX, COMPARE, true>(
918         intrinsic, result, x, kind, dim, mask, terminator);
919   } else {
920     DoPartialMaxOrMinLocDirection<CAT, KIND, IS_MAX, COMPARE, false>(
921         intrinsic, result, x, kind, dim, mask, terminator);
922   }
923 }
924 
925 template <bool IS_MAX>
926 inline void TypedPartialMaxOrMinLoc(const char *intrinsic, Descriptor &result,
927     const Descriptor &x, int kind, int dim, const char *source, int line,
928     const Descriptor *mask, bool back) {
929   Terminator terminator{source, line};
930   CheckIntegerKind(terminator, kind, intrinsic);
931   auto catKind{x.type().GetCategoryAndKind()};
932   RUNTIME_CHECK(terminator, catKind.has_value());
933   switch (catKind->first) {
934   case TypeCategory::Integer:
935     switch (catKind->second) {
936     case 1:
937       DoPartialMaxOrMinLoc<TypeCategory::Integer, 1, IS_MAX, NumericCompare>(
938           intrinsic, result, x, kind, dim, mask, back, terminator);
939       return;
940     case 2:
941       DoPartialMaxOrMinLoc<TypeCategory::Integer, 2, IS_MAX, NumericCompare>(
942           intrinsic, result, x, kind, dim, mask, back, terminator);
943       return;
944     case 4:
945       DoPartialMaxOrMinLoc<TypeCategory::Integer, 4, IS_MAX, NumericCompare>(
946           intrinsic, result, x, kind, dim, mask, back, terminator);
947       return;
948     case 8:
949       DoPartialMaxOrMinLoc<TypeCategory::Integer, 8, IS_MAX, NumericCompare>(
950           intrinsic, result, x, kind, dim, mask, back, terminator);
951       return;
952 #ifdef __SIZEOF_INT128__
953     case 16:
954       DoPartialMaxOrMinLoc<TypeCategory::Integer, 16, IS_MAX, NumericCompare>(
955           intrinsic, result, x, kind, dim, mask, back, terminator);
956       return;
957 #endif
958     }
959     break;
960   case TypeCategory::Real:
961     switch (catKind->second) {
962     // TODO: REAL(2 & 3)
963     case 4:
964       DoPartialMaxOrMinLoc<TypeCategory::Real, 4, IS_MAX, NumericCompare>(
965           intrinsic, result, x, kind, dim, mask, back, terminator);
966       return;
967     case 8:
968       DoPartialMaxOrMinLoc<TypeCategory::Real, 8, IS_MAX, NumericCompare>(
969           intrinsic, result, x, kind, dim, mask, back, terminator);
970       return;
971 #if LONG_DOUBLE == 80
972     case 10:
973       DoPartialMaxOrMinLoc<TypeCategory::Real, 10, IS_MAX, NumericCompare>(
974           intrinsic, result, x, kind, dim, mask, back, terminator);
975       return;
976 #elif LONG_DOUBLE == 128
977     case 16:
978       DoPartialMaxOrMinLoc<TypeCategory::Real, 16, IS_MAX, NumericCompare>(
979           intrinsic, result, x, kind, dim, mask, back, terminator);
980       return;
981 #endif
982     }
983     break;
984   case TypeCategory::Character:
985     switch (catKind->second) {
986     case 1:
987       DoPartialMaxOrMinLoc<TypeCategory::Character, 1, IS_MAX,
988           CharacterCompare>(
989           intrinsic, result, x, kind, dim, mask, back, terminator);
990       return;
991     case 2:
992       DoPartialMaxOrMinLoc<TypeCategory::Character, 2, IS_MAX,
993           CharacterCompare>(
994           intrinsic, result, x, kind, dim, mask, back, terminator);
995       return;
996     case 4:
997       DoPartialMaxOrMinLoc<TypeCategory::Character, 4, IS_MAX,
998           CharacterCompare>(
999           intrinsic, result, x, kind, dim, mask, back, terminator);
1000       return;
1001     }
1002     break;
1003   default:
1004     break;
1005   }
1006   terminator.Crash(
1007       "%s: Bad data type code (%d) for array", intrinsic, x.type().raw());
1008 }
1009 
1010 extern "C" {
1011 void RTNAME(MaxlocDim)(Descriptor &result, const Descriptor &x, int kind,
1012     int dim, const char *source, int line, const Descriptor *mask, bool back) {
1013   TypedPartialMaxOrMinLoc<true>(
1014       "MAXLOC", result, x, kind, dim, source, line, mask, back);
1015 }
1016 void RTNAME(MinlocDim)(Descriptor &result, const Descriptor &x, int kind,
1017     int dim, const char *source, int line, const Descriptor *mask, bool back) {
1018   TypedPartialMaxOrMinLoc<false>(
1019       "MINLOC", result, x, kind, dim, source, line, mask, back);
1020 }
1021 } // extern "C"
1022 
1023 // MAXVAL and MINVAL
1024 
1025 template <TypeCategory CAT, int KIND, bool IS_MAXVAL> struct MaxOrMinIdentity {
1026   using Type = CppTypeFor<CAT, KIND>;
1027   static constexpr Type Value() {
1028     return IS_MAXVAL ? std::numeric_limits<Type>::lowest()
1029                      : std::numeric_limits<Type>::max();
1030   }
1031 };
1032 
1033 // std::numeric_limits<> may not know int128_t
1034 template <bool IS_MAXVAL>
1035 struct MaxOrMinIdentity<TypeCategory::Integer, 16, IS_MAXVAL> {
1036   using Type = CppTypeFor<TypeCategory::Integer, 16>;
1037   static constexpr Type Value() {
1038     return IS_MAXVAL ? Type{1} << 127 : ~Type{0} >> 1;
1039   }
1040 };
1041 
1042 template <TypeCategory CAT, int KIND, bool IS_MAXVAL>
1043 class NumericExtremumAccumulator {
1044 public:
1045   using Type = CppTypeFor<CAT, KIND>;
1046   NumericExtremumAccumulator(const Descriptor &array) : array_{array} {}
1047   template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
1048     *p = extremum_;
1049   }
1050   bool Accumulate(Type x) {
1051     if constexpr (IS_MAXVAL) {
1052       if (x > extremum_) {
1053         extremum_ = x;
1054       }
1055     } else if (x < extremum_) {
1056       extremum_ = x;
1057     }
1058     return true;
1059   }
1060   template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
1061     return Accumulate(*array_.Element<A>(at));
1062   }
1063 
1064 private:
1065   const Descriptor &array_;
1066   Type extremum_{MaxOrMinIdentity<CAT, KIND, IS_MAXVAL>::Value()};
1067 };
1068 
1069 template <TypeCategory CAT, int KIND, bool IS_MAXVAL>
1070 inline CppTypeFor<CAT, KIND> TotalNumericMaxOrMin(const Descriptor &x,
1071     const char *source, int line, int dim, const Descriptor *mask,
1072     const char *intrinsic) {
1073   return GetTotalReduction<CAT, KIND>(x, source, line, dim, mask,
1074       NumericExtremumAccumulator<CAT, KIND, IS_MAXVAL>{x}, intrinsic);
1075 }
1076 
1077 template <TypeCategory CAT, int KIND, bool IS_MAXVAL,
1078     template <TypeCategory, int, bool> class ACCUMULATOR>
1079 static void DoMaxOrMin(Descriptor &result, const Descriptor &x, int dim,
1080     const Descriptor *mask, const char *intrinsic, Terminator &terminator) {
1081   using Type = CppTypeFor<CAT, KIND>;
1082   if (dim == 0 || x.rank() == 1) {
1083     // Total reduction
1084     result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
1085         CFI_attribute_allocatable);
1086     if (int stat{result.Allocate()}) {
1087       terminator.Crash(
1088           "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
1089     }
1090     ACCUMULATOR<CAT, KIND, IS_MAXVAL> accumulator{x};
1091     DoTotalReduction<Type>(x, dim, mask, accumulator, intrinsic, terminator);
1092     accumulator.GetResult(result.OffsetElement<Type>());
1093   } else {
1094     // Partial reduction
1095     PartialReduction<ACCUMULATOR<CAT, KIND, IS_MAXVAL>, CAT, KIND>(
1096         result, x, dim, mask, terminator, intrinsic);
1097   }
1098 }
1099 
1100 template <bool IS_MAXVAL>
1101 inline void NumericMaxOrMin(Descriptor &result, const Descriptor &x, int dim,
1102     const char *source, int line, const Descriptor *mask,
1103     const char *intrinsic) {
1104   Terminator terminator{source, line};
1105   auto type{x.type().GetCategoryAndKind()};
1106   RUNTIME_CHECK(terminator, type);
1107   switch (type->first) {
1108   case TypeCategory::Integer:
1109     switch (type->second) {
1110     case 1:
1111       DoMaxOrMin<TypeCategory::Integer, 1, IS_MAXVAL,
1112           NumericExtremumAccumulator>(
1113           result, x, dim, mask, intrinsic, terminator);
1114       return;
1115     case 2:
1116       DoMaxOrMin<TypeCategory::Integer, 2, IS_MAXVAL,
1117           NumericExtremumAccumulator>(
1118           result, x, dim, mask, intrinsic, terminator);
1119       return;
1120     case 4:
1121       DoMaxOrMin<TypeCategory::Integer, 4, IS_MAXVAL,
1122           NumericExtremumAccumulator>(
1123           result, x, dim, mask, intrinsic, terminator);
1124       return;
1125     case 8:
1126       DoMaxOrMin<TypeCategory::Integer, 8, IS_MAXVAL,
1127           NumericExtremumAccumulator>(
1128           result, x, dim, mask, intrinsic, terminator);
1129       return;
1130 #ifdef __SIZEOF_INT128__
1131     case 16:
1132       DoMaxOrMin<TypeCategory::Integer, 16, IS_MAXVAL,
1133           NumericExtremumAccumulator>(
1134           result, x, dim, mask, intrinsic, terminator);
1135       return;
1136 #endif
1137     }
1138     break;
1139   case TypeCategory::Real:
1140     switch (type->second) {
1141     // TODO: REAL(2 & 3)
1142     case 4:
1143       DoMaxOrMin<TypeCategory::Real, 4, IS_MAXVAL, NumericExtremumAccumulator>(
1144           result, x, dim, mask, intrinsic, terminator);
1145       return;
1146     case 8:
1147       DoMaxOrMin<TypeCategory::Real, 8, IS_MAXVAL, NumericExtremumAccumulator>(
1148           result, x, dim, mask, intrinsic, terminator);
1149       return;
1150 #if LONG_DOUBLE == 80
1151     case 10:
1152       DoMaxOrMin<TypeCategory::Real, 10, IS_MAXVAL, NumericExtremumAccumulator>(
1153           result, x, dim, mask, intrinsic, terminator);
1154       return;
1155 #elif LONG_DOUBLE == 128
1156     case 16:
1157       DoMaxOrMin<TypeCategory::Real, 16, IS_MAXVAL, NumericExtremumAccumulator>(
1158           result, x, dim, mask, intrinsic, terminator);
1159       return;
1160 #endif
1161     }
1162     break;
1163   default:
1164     break;
1165   }
1166   terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
1167 }
1168 
1169 template <TypeCategory, int KIND, bool IS_MAXVAL>
1170 class CharacterExtremumAccumulator {
1171 public:
1172   using Type = CppTypeFor<TypeCategory::Character, KIND>;
1173   CharacterExtremumAccumulator(const Descriptor &array)
1174       : array_{array}, charLen_{array_.ElementBytes() / KIND} {}
1175   template <typename A> void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
1176     static_assert(std::is_same_v<A, Type>);
1177     if (extremum_) {
1178       std::memcpy(p, extremum_, charLen_);
1179     } else {
1180       // empty array: result is all zero-valued characters
1181       std::memset(p, 0, charLen_);
1182     }
1183   }
1184   bool Accumulate(const Type *x) {
1185     if (!extremum_) {
1186       extremum_ = x;
1187     } else {
1188       int cmp{CharacterScalarCompare(x, extremum_, charLen_, charLen_)};
1189       if (IS_MAXVAL == (cmp > 0)) {
1190         extremum_ = x;
1191       }
1192     }
1193     return true;
1194   }
1195   template <typename A> bool AccumulateAt(const SubscriptValue at[]) {
1196     return Accumulate(array_.Element<A>(at));
1197   }
1198 
1199 private:
1200   const Descriptor &array_;
1201   std::size_t charLen_;
1202   const Type *extremum_{nullptr};
1203 };
1204 
1205 template <bool IS_MAXVAL>
1206 inline void CharacterMaxOrMin(Descriptor &result, const Descriptor &x, int dim,
1207     const char *source, int line, const Descriptor *mask,
1208     const char *intrinsic) {
1209   Terminator terminator{source, line};
1210   auto type{x.type().GetCategoryAndKind()};
1211   RUNTIME_CHECK(terminator, type && type->first == TypeCategory::Character);
1212   switch (type->second) {
1213   case 1:
1214     DoMaxOrMin<TypeCategory::Character, 1, IS_MAXVAL,
1215         CharacterExtremumAccumulator>(
1216         result, x, dim, mask, intrinsic, terminator);
1217     break;
1218   case 2:
1219     DoMaxOrMin<TypeCategory::Character, 2, IS_MAXVAL,
1220         CharacterExtremumAccumulator>(
1221         result, x, dim, mask, intrinsic, terminator);
1222     break;
1223   case 4:
1224     DoMaxOrMin<TypeCategory::Character, 4, IS_MAXVAL,
1225         CharacterExtremumAccumulator>(
1226         result, x, dim, mask, intrinsic, terminator);
1227     break;
1228   default:
1229     terminator.Crash("%s: bad character kind %d", intrinsic, type->second);
1230   }
1231 }
1232 
1233 extern "C" {
1234 CppTypeFor<TypeCategory::Integer, 1> RTNAME(MaxvalInteger1)(const Descriptor &x,
1235     const char *source, int line, int dim, const Descriptor *mask) {
1236   return TotalNumericMaxOrMin<TypeCategory::Integer, 1, true>(
1237       x, source, line, dim, mask, "MAXVAL");
1238 }
1239 CppTypeFor<TypeCategory::Integer, 2> RTNAME(MaxvalInteger2)(const Descriptor &x,
1240     const char *source, int line, int dim, const Descriptor *mask) {
1241   return TotalNumericMaxOrMin<TypeCategory::Integer, 2, true>(
1242       x, source, line, dim, mask, "MAXVAL");
1243 }
1244 CppTypeFor<TypeCategory::Integer, 4> RTNAME(MaxvalInteger4)(const Descriptor &x,
1245     const char *source, int line, int dim, const Descriptor *mask) {
1246   return TotalNumericMaxOrMin<TypeCategory::Integer, 4, true>(
1247       x, source, line, dim, mask, "MAXVAL");
1248 }
1249 CppTypeFor<TypeCategory::Integer, 8> RTNAME(MaxvalInteger8)(const Descriptor &x,
1250     const char *source, int line, int dim, const Descriptor *mask) {
1251   return TotalNumericMaxOrMin<TypeCategory::Integer, 8, true>(
1252       x, source, line, dim, mask, "MAXVAL");
1253 }
1254 #ifdef __SIZEOF_INT128__
1255 CppTypeFor<TypeCategory::Integer, 16> RTNAME(MaxvalInteger16)(
1256     const Descriptor &x, const char *source, int line, int dim,
1257     const Descriptor *mask) {
1258   return TotalNumericMaxOrMin<TypeCategory::Integer, 16, true>(
1259       x, source, line, dim, mask, "MAXVAL");
1260 }
1261 #endif
1262 
1263 // TODO: REAL(2 & 3)
1264 CppTypeFor<TypeCategory::Real, 4> RTNAME(MaxvalReal4)(const Descriptor &x,
1265     const char *source, int line, int dim, const Descriptor *mask) {
1266   return TotalNumericMaxOrMin<TypeCategory::Real, 4, true>(
1267       x, source, line, dim, mask, "MAXVAL");
1268 }
1269 CppTypeFor<TypeCategory::Real, 8> RTNAME(MaxvalReal8)(const Descriptor &x,
1270     const char *source, int line, int dim, const Descriptor *mask) {
1271   return TotalNumericMaxOrMin<TypeCategory::Real, 8, true>(
1272       x, source, line, dim, mask, "MAXVAL");
1273 }
1274 #if LONG_DOUBLE == 80
1275 CppTypeFor<TypeCategory::Real, 10> RTNAME(MaxvalReal10)(const Descriptor &x,
1276     const char *source, int line, int dim, const Descriptor *mask) {
1277   return TotalNumericMaxOrMin<TypeCategory::Real, 10, true>(
1278       x, source, line, dim, mask, "MAXVAL");
1279 }
1280 #elif LONG_DOUBLE == 128
1281 CppTypeFor<TypeCategory::Real, 16> RTNAME(MaxvalReal16)(const Descriptor &x,
1282     const char *source, int line, int dim, const Descriptor *mask) {
1283   return TotalNumericMaxOrMin<TypeCategory::Real, 16, true>(
1284       x, source, line, dim, mask, "MAXVAL");
1285 }
1286 #endif
1287 
1288 void RTNAME(MaxvalCharacter)(Descriptor &result, const Descriptor &x,
1289     const char *source, int line, const Descriptor *mask) {
1290   CharacterMaxOrMin<true>(result, x, 0, source, line, mask, "MAXVAL");
1291 }
1292 
1293 CppTypeFor<TypeCategory::Integer, 1> RTNAME(MinvalInteger1)(const Descriptor &x,
1294     const char *source, int line, int dim, const Descriptor *mask) {
1295   return TotalNumericMaxOrMin<TypeCategory::Integer, 1, false>(
1296       x, source, line, dim, mask, "MINVAL");
1297 }
1298 CppTypeFor<TypeCategory::Integer, 2> RTNAME(MinvalInteger2)(const Descriptor &x,
1299     const char *source, int line, int dim, const Descriptor *mask) {
1300   return TotalNumericMaxOrMin<TypeCategory::Integer, 2, false>(
1301       x, source, line, dim, mask, "MINVAL");
1302 }
1303 CppTypeFor<TypeCategory::Integer, 4> RTNAME(MinvalInteger4)(const Descriptor &x,
1304     const char *source, int line, int dim, const Descriptor *mask) {
1305   return TotalNumericMaxOrMin<TypeCategory::Integer, 4, false>(
1306       x, source, line, dim, mask, "MINVAL");
1307 }
1308 CppTypeFor<TypeCategory::Integer, 8> RTNAME(MinvalInteger8)(const Descriptor &x,
1309     const char *source, int line, int dim, const Descriptor *mask) {
1310   return TotalNumericMaxOrMin<TypeCategory::Integer, 8, false>(
1311       x, source, line, dim, mask, "MINVAL");
1312 }
1313 #ifdef __SIZEOF_INT128__
1314 CppTypeFor<TypeCategory::Integer, 16> RTNAME(MinvalInteger16)(
1315     const Descriptor &x, const char *source, int line, int dim,
1316     const Descriptor *mask) {
1317   return TotalNumericMaxOrMin<TypeCategory::Integer, 16, false>(
1318       x, source, line, dim, mask, "MINVAL");
1319 }
1320 #endif
1321 
1322 // TODO: REAL(2 & 3)
1323 CppTypeFor<TypeCategory::Real, 4> RTNAME(MinvalReal4)(const Descriptor &x,
1324     const char *source, int line, int dim, const Descriptor *mask) {
1325   return TotalNumericMaxOrMin<TypeCategory::Real, 4, false>(
1326       x, source, line, dim, mask, "MINVAL");
1327 }
1328 CppTypeFor<TypeCategory::Real, 8> RTNAME(MinvalReal8)(const Descriptor &x,
1329     const char *source, int line, int dim, const Descriptor *mask) {
1330   return TotalNumericMaxOrMin<TypeCategory::Real, 8, false>(
1331       x, source, line, dim, mask, "MINVAL");
1332 }
1333 #if LONG_DOUBLE == 80
1334 CppTypeFor<TypeCategory::Real, 10> RTNAME(MinvalReal10)(const Descriptor &x,
1335     const char *source, int line, int dim, const Descriptor *mask) {
1336   return TotalNumericMaxOrMin<TypeCategory::Real, 10, false>(
1337       x, source, line, dim, mask, "MINVAL");
1338 }
1339 #elif LONG_DOUBLE == 128
1340 CppTypeFor<TypeCategory::Real, 16> RTNAME(MinvalReal16)(const Descriptor &x,
1341     const char *source, int line, int dim, const Descriptor *mask) {
1342   return TotalNumericMaxOrMin<TypeCategory::Real, 16, false>(
1343       x, source, line, dim, mask, "MINVAL");
1344 }
1345 #endif
1346 
1347 void RTNAME(MinvalCharacter)(Descriptor &result, const Descriptor &x,
1348     const char *source, int line, const Descriptor *mask) {
1349   CharacterMaxOrMin<false>(result, x, 0, source, line, mask, "MINVAL");
1350 }
1351 
1352 void RTNAME(MaxvalDim)(Descriptor &result, const Descriptor &x, int dim,
1353     const char *source, int line, const Descriptor *mask) {
1354   if (x.type().IsCharacter()) {
1355     CharacterMaxOrMin<true>(result, x, dim, source, line, mask, "MAXVAL");
1356   } else {
1357     NumericMaxOrMin<true>(result, x, dim, source, line, mask, "MAXVAL");
1358   }
1359 }
1360 void RTNAME(MinvalDim)(Descriptor &result, const Descriptor &x, int dim,
1361     const char *source, int line, const Descriptor *mask) {
1362   if (x.type().IsCharacter()) {
1363     CharacterMaxOrMin<false>(result, x, dim, source, line, mask, "MINVAL");
1364   } else {
1365     NumericMaxOrMin<false>(result, x, dim, source, line, mask, "MINVAL");
1366   }
1367 }
1368 
1369 } // extern "C"
1370 
1371 // ALL, ANY, & COUNT
1372 
1373 template <bool IS_ALL> class LogicalAccumulator {
1374 public:
1375   using Type = bool;
1376   explicit LogicalAccumulator(const Descriptor &array) : array_{array} {}
1377   bool Result() const { return result_; }
1378   bool Accumulate(bool x) {
1379     if (x == IS_ALL) {
1380       return true;
1381     } else {
1382       result_ = x;
1383       return false;
1384     }
1385   }
1386   template <typename IGNORED = void>
1387   bool AccumulateAt(const SubscriptValue at[]) {
1388     return Accumulate(IsLogicalElementTrue(array_, at));
1389   }
1390 
1391 private:
1392   const Descriptor &array_;
1393   bool result_{IS_ALL};
1394 };
1395 
1396 template <typename ACCUMULATOR>
1397 inline auto GetTotalLogicalReduction(const Descriptor &x, const char *source,
1398     int line, int dim, ACCUMULATOR &&accumulator, const char *intrinsic) ->
1399     typename ACCUMULATOR::Type {
1400   Terminator terminator{source, line};
1401   if (dim < 0 || dim > 1) {
1402     terminator.Crash("%s: bad DIM=%d", intrinsic, dim);
1403   }
1404   SubscriptValue xAt[maxRank];
1405   x.GetLowerBounds(xAt);
1406   for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
1407     if (!accumulator.AccumulateAt(xAt)) {
1408       break; // cut short, result is known
1409     }
1410   }
1411   return accumulator.Result();
1412 }
1413 
1414 template <typename ACCUMULATOR>
1415 inline auto ReduceLogicalDimToScalar(const Descriptor &x, int zeroBasedDim,
1416     SubscriptValue subscripts[]) -> typename ACCUMULATOR::Type {
1417   ACCUMULATOR accumulator{x};
1418   SubscriptValue xAt[maxRank];
1419   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
1420   const auto &dim{x.GetDimension(zeroBasedDim)};
1421   SubscriptValue at{dim.LowerBound()};
1422   for (auto n{dim.Extent()}; n-- > 0; ++at) {
1423     xAt[zeroBasedDim] = at;
1424     if (!accumulator.AccumulateAt(xAt)) {
1425       break;
1426     }
1427   }
1428   return accumulator.Result();
1429 }
1430 
1431 template <bool IS_ALL, int KIND>
1432 inline void ReduceLogicalDimension(Descriptor &result, const Descriptor &x,
1433     int dim, Terminator &terminator, const char *intrinsic) {
1434   // Standard requires result to have same LOGICAL kind as argument.
1435   CreatePartialReductionResult(result, x, dim, terminator, intrinsic, x.type());
1436   SubscriptValue at[maxRank];
1437   result.GetLowerBounds(at);
1438   INTERNAL_CHECK(at[0] == 1);
1439   using CppType = CppTypeFor<TypeCategory::Logical, KIND>;
1440   for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
1441     *result.Element<CppType>(at) =
1442         ReduceLogicalDimToScalar<LogicalAccumulator<IS_ALL>>(x, dim - 1, at);
1443   }
1444 }
1445 
1446 template <bool IS_ALL>
1447 inline void DoReduceLogicalDimension(Descriptor &result, const Descriptor &x,
1448     int dim, Terminator &terminator, const char *intrinsic) {
1449   auto catKind{x.type().GetCategoryAndKind()};
1450   RUNTIME_CHECK(terminator, catKind && catKind->first == TypeCategory::Logical);
1451   switch (catKind->second) {
1452   case 1:
1453     ReduceLogicalDimension<IS_ALL, 1>(result, x, dim, terminator, intrinsic);
1454     break;
1455   case 2:
1456     ReduceLogicalDimension<IS_ALL, 2>(result, x, dim, terminator, intrinsic);
1457     break;
1458   case 4:
1459     ReduceLogicalDimension<IS_ALL, 4>(result, x, dim, terminator, intrinsic);
1460     break;
1461   case 8:
1462     ReduceLogicalDimension<IS_ALL, 8>(result, x, dim, terminator, intrinsic);
1463     break;
1464   default:
1465     terminator.Crash(
1466         "%s: bad argument type LOGICAL(%d)", intrinsic, catKind->second);
1467   }
1468 }
1469 
1470 // COUNT
1471 
1472 class CountAccumulator {
1473 public:
1474   using Type = std::int64_t;
1475   explicit CountAccumulator(const Descriptor &array) : array_{array} {}
1476   Type Result() const { return result_; }
1477   template <typename IGNORED = void>
1478   bool AccumulateAt(const SubscriptValue at[]) {
1479     if (IsLogicalElementTrue(array_, at)) {
1480       ++result_;
1481     }
1482     return true;
1483   }
1484 
1485 private:
1486   const Descriptor &array_;
1487   Type result_{0};
1488 };
1489 
1490 template <int KIND>
1491 inline void CountDimension(
1492     Descriptor &result, const Descriptor &x, int dim, Terminator &terminator) {
1493   CreatePartialReductionResult(result, x, dim, terminator, "COUNT",
1494       TypeCode{TypeCategory::Integer, KIND});
1495   SubscriptValue at[maxRank];
1496   result.GetLowerBounds(at);
1497   INTERNAL_CHECK(at[0] == 1);
1498   using CppType = CppTypeFor<TypeCategory::Integer, KIND>;
1499   for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
1500     *result.Element<CppType>(at) =
1501         ReduceLogicalDimToScalar<CountAccumulator>(x, dim - 1, at);
1502   }
1503 }
1504 
1505 extern "C" {
1506 
1507 bool RTNAME(All)(const Descriptor &x, const char *source, int line, int dim) {
1508   return GetTotalLogicalReduction(
1509       x, source, line, dim, LogicalAccumulator<true>{x}, "ALL");
1510 }
1511 void RTNAME(AllDim)(Descriptor &result, const Descriptor &x, int dim,
1512     const char *source, int line) {
1513   Terminator terminator{source, line};
1514   DoReduceLogicalDimension<true>(result, x, dim, terminator, "ALL");
1515 }
1516 
1517 bool RTNAME(Any)(const Descriptor &x, const char *source, int line, int dim) {
1518   return GetTotalLogicalReduction(
1519       x, source, line, dim, LogicalAccumulator<false>{x}, "ANY");
1520 }
1521 void RTNAME(AnyDim)(Descriptor &result, const Descriptor &x, int dim,
1522     const char *source, int line) {
1523   Terminator terminator{source, line};
1524   DoReduceLogicalDimension<false>(result, x, dim, terminator, "ANY");
1525 }
1526 
1527 std::int64_t RTNAME(Count)(
1528     const Descriptor &x, const char *source, int line, int dim) {
1529   return GetTotalLogicalReduction(
1530       x, source, line, dim, CountAccumulator{x}, "COUNT");
1531 }
1532 void RTNAME(CountDim)(Descriptor &result, const Descriptor &x, int dim,
1533     int kind, const char *source, int line) {
1534   Terminator terminator{source, line};
1535   switch (kind) {
1536   case 1:
1537     CountDimension<1>(result, x, dim, terminator);
1538     break;
1539   case 2:
1540     CountDimension<2>(result, x, dim, terminator);
1541     break;
1542   case 4:
1543     CountDimension<4>(result, x, dim, terminator);
1544     break;
1545   case 8:
1546     CountDimension<8>(result, x, dim, terminator);
1547     break;
1548 #ifdef __SIZEOF_INT128__
1549   case 16:
1550     CountDimension<16>(result, x, dim, terminator);
1551     break;
1552 #endif
1553   default:
1554     terminator.Crash("COUNT: bad KIND=%d", kind);
1555   }
1556 }
1557 
1558 } // extern "C"
1559 } // namespace Fortran::runtime
1560