1 //===-- Character.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "flang/Optimizer/Builder/Character.h"
14 #include "flang/Lower/Todo.h"
15 #include "flang/Optimizer/Builder/DoLoopHelper.h"
16 #include "flang/Optimizer/Builder/FIRBuilder.h"
17 #include "llvm/Support/Debug.h"
18 #include <optional>
19 
20 #define DEBUG_TYPE "flang-lower-character"
21 
22 //===----------------------------------------------------------------------===//
23 // CharacterExprHelper implementation
24 //===----------------------------------------------------------------------===//
25 
26 /// Unwrap all the ref and box types and return the inner element type.
27 static mlir::Type unwrapBoxAndRef(mlir::Type type) {
28   if (auto boxType = type.dyn_cast<fir::BoxCharType>())
29     return boxType.getEleTy();
30   while (true) {
31     type = fir::unwrapRefType(type);
32     if (auto boxTy = type.dyn_cast<fir::BoxType>())
33       type = boxTy.getEleTy();
34     else
35       break;
36   }
37   return type;
38 }
39 
40 /// Unwrap base fir.char<kind,len> type.
41 static fir::CharacterType recoverCharacterType(mlir::Type type) {
42   type = fir::unwrapSequenceType(unwrapBoxAndRef(type));
43   if (auto charTy = type.dyn_cast<fir::CharacterType>())
44     return charTy;
45   llvm::report_fatal_error("expected a character type");
46 }
47 
48 bool fir::factory::CharacterExprHelper::isCharacterScalar(mlir::Type type) {
49   type = unwrapBoxAndRef(type);
50   return !type.isa<fir::SequenceType>() && fir::isa_char(type);
51 }
52 
53 bool fir::factory::CharacterExprHelper::isArray(mlir::Type type) {
54   type = unwrapBoxAndRef(type);
55   if (auto seqTy = type.dyn_cast<fir::SequenceType>())
56     return fir::isa_char(seqTy.getEleTy());
57   return false;
58 }
59 
60 fir::CharacterType
61 fir::factory::CharacterExprHelper::getCharacterType(mlir::Type type) {
62   assert(isCharacterScalar(type) && "expected scalar character");
63   return recoverCharacterType(type);
64 }
65 
66 fir::CharacterType
67 fir::factory::CharacterExprHelper::getCharType(mlir::Type type) {
68   return recoverCharacterType(type);
69 }
70 
71 fir::CharacterType fir::factory::CharacterExprHelper::getCharacterType(
72     const fir::CharBoxValue &box) {
73   return getCharacterType(box.getBuffer().getType());
74 }
75 
76 fir::CharacterType
77 fir::factory::CharacterExprHelper::getCharacterType(mlir::Value str) {
78   return getCharacterType(str.getType());
79 }
80 
81 /// Determine the static size of the character. Returns the computed size, not
82 /// an IR Value.
83 static std::optional<fir::CharacterType::LenType>
84 getCompileTimeLength(const fir::CharBoxValue &box) {
85   auto len = recoverCharacterType(box.getBuffer().getType()).getLen();
86   if (len == fir::CharacterType::unknownLen())
87     return {};
88   return len;
89 }
90 
91 /// Detect the precondition that the value `str` does not reside in memory. Such
92 /// values will have a type `!fir.array<...x!fir.char<N>>` or `!fir.char<N>`.
93 LLVM_ATTRIBUTE_UNUSED static bool needToMaterialize(mlir::Value str) {
94   return str.getType().isa<fir::SequenceType>() || fir::isa_char(str.getType());
95 }
96 
97 /// Unwrap integer constant from mlir::Value.
98 static llvm::Optional<std::int64_t> getIntIfConstant(mlir::Value value) {
99   if (auto *definingOp = value.getDefiningOp())
100     if (auto cst = mlir::dyn_cast<mlir::arith::ConstantOp>(definingOp))
101       if (auto intAttr = cst.getValue().dyn_cast<mlir::IntegerAttr>())
102         return intAttr.getInt();
103   return {};
104 }
105 
106 /// This is called only if `str` does not reside in memory. Such a bare string
107 /// value will be converted into a memory-based temporary and an extended
108 /// boxchar value returned.
109 fir::CharBoxValue
110 fir::factory::CharacterExprHelper::materializeValue(mlir::Value str) {
111   assert(needToMaterialize(str));
112   auto ty = str.getType();
113   assert(isCharacterScalar(ty) && "expected scalar character");
114   auto charTy = ty.dyn_cast<fir::CharacterType>();
115   if (!charTy || charTy.getLen() == fir::CharacterType::unknownLen()) {
116     LLVM_DEBUG(llvm::dbgs() << "cannot materialize: " << str << '\n');
117     llvm_unreachable("must be a !fir.char<N> type");
118   }
119   auto len = builder.createIntegerConstant(
120       loc, builder.getCharacterLengthType(), charTy.getLen());
121   auto temp = builder.create<fir::AllocaOp>(loc, charTy);
122   builder.create<fir::StoreOp>(loc, str, temp);
123   LLVM_DEBUG(llvm::dbgs() << "materialized as local: " << str << " -> (" << temp
124                           << ", " << len << ")\n");
125   return {temp, len};
126 }
127 
128 fir::ExtendedValue
129 fir::factory::CharacterExprHelper::toExtendedValue(mlir::Value character,
130                                                    mlir::Value len) {
131   auto lenType = builder.getCharacterLengthType();
132   auto type = character.getType();
133   auto base = fir::isa_passbyref_type(type) ? character : mlir::Value{};
134   auto resultLen = len;
135   llvm::SmallVector<mlir::Value> extents;
136 
137   if (auto eleType = fir::dyn_cast_ptrEleTy(type))
138     type = eleType;
139 
140   if (auto arrayType = type.dyn_cast<fir::SequenceType>()) {
141     type = arrayType.getEleTy();
142     auto indexType = builder.getIndexType();
143     for (auto extent : arrayType.getShape()) {
144       if (extent == fir::SequenceType::getUnknownExtent())
145         break;
146       extents.emplace_back(
147           builder.createIntegerConstant(loc, indexType, extent));
148     }
149     // Last extent might be missing in case of assumed-size. If more extents
150     // could not be deduced from type, that's an error (a fir.box should
151     // have been used in the interface).
152     if (extents.size() + 1 < arrayType.getShape().size())
153       mlir::emitError(loc, "cannot retrieve array extents from type");
154   }
155 
156   if (auto charTy = type.dyn_cast<fir::CharacterType>()) {
157     if (!resultLen && charTy.getLen() != fir::CharacterType::unknownLen())
158       resultLen = builder.createIntegerConstant(loc, lenType, charTy.getLen());
159   } else if (auto boxCharType = type.dyn_cast<fir::BoxCharType>()) {
160     auto refType = builder.getRefType(boxCharType.getEleTy());
161     // If the embox is accessible, use its operand to avoid filling
162     // the generated fir with embox/unbox.
163     mlir::Value boxCharLen;
164     if (auto definingOp = character.getDefiningOp()) {
165       if (auto box = mlir::dyn_cast<fir::EmboxCharOp>(definingOp)) {
166         base = box.getMemref();
167         boxCharLen = box.getLen();
168       }
169     }
170     if (!boxCharLen) {
171       auto unboxed =
172           builder.create<fir::UnboxCharOp>(loc, refType, lenType, character);
173       base = builder.createConvert(loc, refType, unboxed.getResult(0));
174       boxCharLen = unboxed.getResult(1);
175     }
176     if (!resultLen) {
177       resultLen = boxCharLen;
178     }
179   } else if (type.isa<fir::BoxType>()) {
180     mlir::emitError(loc, "descriptor or derived type not yet handled");
181   } else {
182     llvm_unreachable("Cannot translate mlir::Value to character ExtendedValue");
183   }
184 
185   if (!base) {
186     if (auto load =
187             mlir::dyn_cast_or_null<fir::LoadOp>(character.getDefiningOp())) {
188       base = load.getOperand();
189     } else {
190       return materializeValue(fir::getBase(character));
191     }
192   }
193   if (!resultLen)
194     llvm::report_fatal_error("no dynamic length found for character");
195   if (!extents.empty())
196     return fir::CharArrayBoxValue{base, resultLen, extents};
197   return fir::CharBoxValue{base, resultLen};
198 }
199 
200 static mlir::Type getSingletonCharType(mlir::MLIRContext *ctxt, int kind) {
201   return fir::CharacterType::getSingleton(ctxt, kind);
202 }
203 
204 mlir::Value
205 fir::factory::CharacterExprHelper::createEmbox(const fir::CharBoxValue &box) {
206   // Base CharBoxValue of CharArrayBoxValue are ok here (do not require a scalar
207   // type)
208   auto charTy = recoverCharacterType(box.getBuffer().getType());
209   auto boxCharType =
210       fir::BoxCharType::get(builder.getContext(), charTy.getFKind());
211   auto refType = fir::ReferenceType::get(boxCharType.getEleTy());
212   mlir::Value buff = box.getBuffer();
213   // fir.boxchar requires a memory reference. Allocate temp if the character is
214   // not in memory.
215   if (!fir::isa_ref_type(buff.getType())) {
216     auto temp = builder.createTemporary(loc, buff.getType());
217     builder.create<fir::StoreOp>(loc, buff, temp);
218     buff = temp;
219   }
220   buff = builder.createConvert(loc, refType, buff);
221   // Convert in case the provided length is not of the integer type that must
222   // be used in boxchar.
223   auto len = builder.createConvert(loc, builder.getCharacterLengthType(),
224                                    box.getLen());
225   return builder.create<fir::EmboxCharOp>(loc, boxCharType, buff, len);
226 }
227 
228 fir::CharBoxValue fir::factory::CharacterExprHelper::toScalarCharacter(
229     const fir::CharArrayBoxValue &box) {
230   if (box.getBuffer().getType().isa<fir::PointerType>())
231     TODO(loc, "concatenating non contiguous character array into a scalar");
232 
233   // TODO: add a fast path multiplying new length at compile time if the info is
234   // in the array type.
235   auto lenType = builder.getCharacterLengthType();
236   auto len = builder.createConvert(loc, lenType, box.getLen());
237   for (auto extent : box.getExtents())
238     len = builder.create<mlir::arith::MulIOp>(
239         loc, len, builder.createConvert(loc, lenType, extent));
240 
241   // TODO: typeLen can be improved in compiled constant cases
242   // TODO: allow bare fir.array<> (no ref) conversion here ?
243   auto typeLen = fir::CharacterType::unknownLen();
244   auto kind = recoverCharacterType(box.getBuffer().getType()).getFKind();
245   auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen);
246   auto type = fir::ReferenceType::get(charTy);
247   auto buffer = builder.createConvert(loc, type, box.getBuffer());
248   return {buffer, len};
249 }
250 
251 mlir::Value fir::factory::CharacterExprHelper::createEmbox(
252     const fir::CharArrayBoxValue &box) {
253   // Use same embox as for scalar. It's losing the actual data size information
254   // (We do not multiply the length by the array size), but that is what Fortran
255   // call interfaces using boxchar expect.
256   return createEmbox(static_cast<const fir::CharBoxValue &>(box));
257 }
258 
259 /// Get the address of the element at position \p index of the scalar character
260 /// \p buffer.
261 /// \p buffer must be of type !fir.ref<fir.char<k, len>>. The length may be
262 /// unknown. \p index must have any integer type, and is zero based. The return
263 /// value is a singleton address (!fir.ref<!fir.char<kind>>)
264 mlir::Value
265 fir::factory::CharacterExprHelper::createElementAddr(mlir::Value buffer,
266                                                      mlir::Value index) {
267   // The only way to address an element of a fir.ref<char<kind, len>> is to cast
268   // it to a fir.array<len x fir.char<kind>> and use fir.coordinate_of.
269   auto bufferType = buffer.getType();
270   assert(fir::isa_ref_type(bufferType));
271   assert(isCharacterScalar(bufferType));
272   auto charTy = recoverCharacterType(bufferType);
273   auto singleTy = getSingletonCharType(builder.getContext(), charTy.getFKind());
274   auto singleRefTy = builder.getRefType(singleTy);
275   auto extent = fir::SequenceType::getUnknownExtent();
276   if (charTy.getLen() != fir::CharacterType::unknownLen())
277     extent = charTy.getLen();
278   auto coorTy = builder.getRefType(fir::SequenceType::get({extent}, singleTy));
279 
280   auto coor = builder.createConvert(loc, coorTy, buffer);
281   auto i = builder.createConvert(loc, builder.getIndexType(), index);
282   return builder.create<fir::CoordinateOp>(loc, singleRefTy, coor, i);
283 }
284 
285 /// Load a character out of `buff` from offset `index`.
286 /// `buff` must be a reference to memory.
287 mlir::Value
288 fir::factory::CharacterExprHelper::createLoadCharAt(mlir::Value buff,
289                                                     mlir::Value index) {
290   LLVM_DEBUG(llvm::dbgs() << "load a char: " << buff << " type: "
291                           << buff.getType() << " at: " << index << '\n');
292   return builder.create<fir::LoadOp>(loc, createElementAddr(buff, index));
293 }
294 
295 /// Store the singleton character `c` to `str` at offset `index`.
296 /// `str` must be a reference to memory.
297 void fir::factory::CharacterExprHelper::createStoreCharAt(mlir::Value str,
298                                                           mlir::Value index,
299                                                           mlir::Value c) {
300   LLVM_DEBUG(llvm::dbgs() << "store the char: " << c << " into: " << str
301                           << " type: " << str.getType() << " at: " << index
302                           << '\n');
303   auto addr = createElementAddr(str, index);
304   builder.create<fir::StoreOp>(loc, c, addr);
305 }
306 
307 // FIXME: this temp is useless... either fir.coordinate_of needs to
308 // work on "loaded" characters (!fir.array<len x fir.char<kind>>) or
309 // character should never be loaded.
310 // If this is a fir.array<>, allocate and store the value so that
311 // fir.cooridnate_of can be use on the value.
312 mlir::Value fir::factory::CharacterExprHelper::getCharBoxBuffer(
313     const fir::CharBoxValue &box) {
314   auto buff = box.getBuffer();
315   if (fir::isa_char(buff.getType())) {
316     auto newBuff = builder.create<fir::AllocaOp>(loc, buff.getType());
317     builder.create<fir::StoreOp>(loc, buff, newBuff);
318     return newBuff;
319   }
320   return buff;
321 }
322 
323 /// Create a loop to copy `count` characters from `src` to `dest`. Note that the
324 /// KIND indicates the number of bits in a code point. (ASCII, UCS-2, or UCS-4.)
325 void fir::factory::CharacterExprHelper::createCopy(
326     const fir::CharBoxValue &dest, const fir::CharBoxValue &src,
327     mlir::Value count) {
328   auto fromBuff = getCharBoxBuffer(src);
329   auto toBuff = getCharBoxBuffer(dest);
330   LLVM_DEBUG(llvm::dbgs() << "create char copy from: "; src.dump();
331              llvm::dbgs() << " to: "; dest.dump();
332              llvm::dbgs() << " count: " << count << '\n');
333   auto kind = getCharacterKind(src.getBuffer().getType());
334   // If the src and dest are the same KIND, then use memmove to move the bits.
335   // We don't have to worry about overlapping ranges with memmove.
336   if (getCharacterKind(dest.getBuffer().getType()) == kind) {
337     auto bytes = builder.getKindMap().getCharacterBitsize(kind) / 8;
338     auto i64Ty = builder.getI64Type();
339     auto kindBytes = builder.createIntegerConstant(loc, i64Ty, bytes);
340     auto castCount = builder.createConvert(loc, i64Ty, count);
341     auto totalBytes =
342         builder.create<mlir::arith::MulIOp>(loc, kindBytes, castCount);
343     auto notVolatile = builder.createBool(loc, false);
344     auto memmv = getLlvmMemmove(builder);
345     auto argTys = memmv.getFunctionType().getInputs();
346     auto toPtr = builder.createConvert(loc, argTys[0], toBuff);
347     auto fromPtr = builder.createConvert(loc, argTys[1], fromBuff);
348     builder.create<fir::CallOp>(
349         loc, memmv, mlir::ValueRange{toPtr, fromPtr, totalBytes, notVolatile});
350     return;
351   }
352 
353   // Convert a CHARACTER of one KIND into a CHARACTER of another KIND.
354   builder.create<fir::CharConvertOp>(loc, src.getBuffer(), count,
355                                      dest.getBuffer());
356 }
357 
358 void fir::factory::CharacterExprHelper::createPadding(
359     const fir::CharBoxValue &str, mlir::Value lower, mlir::Value upper) {
360   auto blank = createBlankConstant(getCharacterType(str));
361   // Always create the loop, if upper < lower, no iteration will be
362   // executed.
363   auto toBuff = getCharBoxBuffer(str);
364   fir::factory::DoLoopHelper{builder, loc}.createLoop(
365       lower, upper, [&](fir::FirOpBuilder &, mlir::Value index) {
366         createStoreCharAt(toBuff, index, blank);
367       });
368 }
369 
370 fir::CharBoxValue
371 fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
372                                                        mlir::Value len) {
373   auto kind = recoverCharacterType(type).getFKind();
374   auto typeLen = fir::CharacterType::unknownLen();
375   // If len is a constant, reflect the length in the type.
376   if (auto cstLen = getIntIfConstant(len))
377     typeLen = *cstLen;
378   auto *ctxt = builder.getContext();
379   auto charTy = fir::CharacterType::get(ctxt, kind, typeLen);
380   llvm::SmallVector<mlir::Value> lenParams;
381   if (typeLen == fir::CharacterType::unknownLen())
382     lenParams.push_back(len);
383   auto ref = builder.allocateLocal(loc, charTy, "", ".chrtmp",
384                                    /*shape=*/llvm::None, lenParams);
385   return {ref, len};
386 }
387 
388 fir::CharBoxValue fir::factory::CharacterExprHelper::createTempFrom(
389     const fir::ExtendedValue &source) {
390   const auto *charBox = source.getCharBox();
391   if (!charBox)
392     fir::emitFatalError(loc, "source must be a fir::CharBoxValue");
393   auto len = charBox->getLen();
394   auto sourceTy = charBox->getBuffer().getType();
395   auto temp = createCharacterTemp(sourceTy, len);
396   if (fir::isa_ref_type(sourceTy)) {
397     createCopy(temp, *charBox, len);
398   } else {
399     auto ref = builder.createConvert(loc, builder.getRefType(sourceTy),
400                                      temp.getBuffer());
401     builder.create<fir::StoreOp>(loc, charBox->getBuffer(), ref);
402   }
403   return temp;
404 }
405 
406 // Simple length one character assignment without loops.
407 void fir::factory::CharacterExprHelper::createLengthOneAssign(
408     const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
409   auto addr = lhs.getBuffer();
410   auto toTy = fir::unwrapRefType(addr.getType());
411   mlir::Value val = rhs.getBuffer();
412   if (fir::isa_ref_type(val.getType()))
413     val = builder.create<fir::LoadOp>(loc, val);
414   val = builder.createConvert(loc, toTy, val);
415   builder.create<fir::StoreOp>(loc, val, addr);
416 }
417 
418 /// Returns the minimum of integer mlir::Value \p a and \b.
419 mlir::Value genMin(fir::FirOpBuilder &builder, mlir::Location loc,
420                    mlir::Value a, mlir::Value b) {
421   auto cmp = builder.create<mlir::arith::CmpIOp>(
422       loc, mlir::arith::CmpIPredicate::slt, a, b);
423   return builder.create<mlir::arith::SelectOp>(loc, cmp, a, b);
424 }
425 
426 void fir::factory::CharacterExprHelper::createAssign(
427     const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
428   auto rhsCstLen = getCompileTimeLength(rhs);
429   auto lhsCstLen = getCompileTimeLength(lhs);
430   bool compileTimeSameLength =
431       lhsCstLen && rhsCstLen && *lhsCstLen == *rhsCstLen;
432 
433   if (compileTimeSameLength && *lhsCstLen == 1) {
434     createLengthOneAssign(lhs, rhs);
435     return;
436   }
437 
438   // Copy the minimum of the lhs and rhs lengths and pad the lhs remainder
439   // if needed.
440   auto copyCount = lhs.getLen();
441   auto idxTy = builder.getIndexType();
442   if (!compileTimeSameLength) {
443     auto lhsLen = builder.createConvert(loc, idxTy, lhs.getLen());
444     auto rhsLen = builder.createConvert(loc, idxTy, rhs.getLen());
445     copyCount = genMin(builder, loc, lhsLen, rhsLen);
446   }
447 
448   // Actual copy
449   createCopy(lhs, rhs, copyCount);
450 
451   // Pad if needed.
452   if (!compileTimeSameLength) {
453     auto one = builder.createIntegerConstant(loc, lhs.getLen().getType(), 1);
454     auto maxPadding =
455         builder.create<mlir::arith::SubIOp>(loc, lhs.getLen(), one);
456     createPadding(lhs, copyCount, maxPadding);
457   }
458 }
459 
460 fir::CharBoxValue fir::factory::CharacterExprHelper::createConcatenate(
461     const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
462   auto lhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
463                                       lhs.getLen());
464   auto rhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
465                                       rhs.getLen());
466   mlir::Value len = builder.create<mlir::arith::AddIOp>(loc, lhsLen, rhsLen);
467   auto temp = createCharacterTemp(getCharacterType(rhs), len);
468   createCopy(temp, lhs, lhsLen);
469   auto one = builder.createIntegerConstant(loc, len.getType(), 1);
470   auto upperBound = builder.create<mlir::arith::SubIOp>(loc, len, one);
471   auto lhsLenIdx = builder.createConvert(loc, builder.getIndexType(), lhsLen);
472   auto fromBuff = getCharBoxBuffer(rhs);
473   auto toBuff = getCharBoxBuffer(temp);
474   fir::factory::DoLoopHelper{builder, loc}.createLoop(
475       lhsLenIdx, upperBound, one,
476       [&](fir::FirOpBuilder &bldr, mlir::Value index) {
477         auto rhsIndex = bldr.create<mlir::arith::SubIOp>(loc, index, lhsLenIdx);
478         auto charVal = createLoadCharAt(fromBuff, rhsIndex);
479         createStoreCharAt(toBuff, index, charVal);
480       });
481   return temp;
482 }
483 
484 fir::CharBoxValue fir::factory::CharacterExprHelper::createSubstring(
485     const fir::CharBoxValue &box, llvm::ArrayRef<mlir::Value> bounds) {
486   // Constant need to be materialize in memory to use fir.coordinate_of.
487   auto nbounds = bounds.size();
488   if (nbounds < 1 || nbounds > 2) {
489     mlir::emitError(loc, "Incorrect number of bounds in substring");
490     return {mlir::Value{}, mlir::Value{}};
491   }
492   mlir::SmallVector<mlir::Value> castBounds;
493   // Convert bounds to length type to do safe arithmetic on it.
494   for (auto bound : bounds)
495     castBounds.push_back(
496         builder.createConvert(loc, builder.getCharacterLengthType(), bound));
497   auto lowerBound = castBounds[0];
498   // FIR CoordinateOp is zero based but Fortran substring are one based.
499   auto one = builder.createIntegerConstant(loc, lowerBound.getType(), 1);
500   auto offset =
501       builder.create<mlir::arith::SubIOp>(loc, lowerBound, one).getResult();
502   auto addr = createElementAddr(box.getBuffer(), offset);
503   auto kind = getCharacterKind(box.getBuffer().getType());
504   auto charTy = fir::CharacterType::getUnknownLen(builder.getContext(), kind);
505   auto resultType = builder.getRefType(charTy);
506   auto substringRef = builder.createConvert(loc, resultType, addr);
507 
508   // Compute the length.
509   mlir::Value substringLen;
510   if (nbounds < 2) {
511     substringLen =
512         builder.create<mlir::arith::SubIOp>(loc, box.getLen(), castBounds[0]);
513   } else {
514     substringLen =
515         builder.create<mlir::arith::SubIOp>(loc, castBounds[1], castBounds[0]);
516   }
517   substringLen = builder.create<mlir::arith::AddIOp>(loc, substringLen, one);
518 
519   // Set length to zero if bounds were reversed (Fortran 2018 9.4.1)
520   auto zero = builder.createIntegerConstant(loc, substringLen.getType(), 0);
521   auto cdt = builder.create<mlir::arith::CmpIOp>(
522       loc, mlir::arith::CmpIPredicate::slt, substringLen, zero);
523   substringLen =
524       builder.create<mlir::arith::SelectOp>(loc, cdt, zero, substringLen);
525 
526   return {substringRef, substringLen};
527 }
528 
529 mlir::Value
530 fir::factory::CharacterExprHelper::createLenTrim(const fir::CharBoxValue &str) {
531   // Note: Runtime for LEN_TRIM should also be available at some
532   // point. For now use an inlined implementation.
533   auto indexType = builder.getIndexType();
534   auto len = builder.createConvert(loc, indexType, str.getLen());
535   auto one = builder.createIntegerConstant(loc, indexType, 1);
536   auto minusOne = builder.createIntegerConstant(loc, indexType, -1);
537   auto zero = builder.createIntegerConstant(loc, indexType, 0);
538   auto trueVal = builder.createIntegerConstant(loc, builder.getI1Type(), 1);
539   auto blank = createBlankConstantCode(getCharacterType(str));
540   mlir::Value lastChar = builder.create<mlir::arith::SubIOp>(loc, len, one);
541 
542   auto iterWhile =
543       builder.create<fir::IterWhileOp>(loc, lastChar, zero, minusOne, trueVal,
544                                        /*returnFinalCount=*/false, lastChar);
545   auto insPt = builder.saveInsertionPoint();
546   builder.setInsertionPointToStart(iterWhile.getBody());
547   auto index = iterWhile.getInductionVar();
548   // Look for first non-blank from the right of the character.
549   auto fromBuff = getCharBoxBuffer(str);
550   auto elemAddr = createElementAddr(fromBuff, index);
551   auto codeAddr =
552       builder.createConvert(loc, builder.getRefType(blank.getType()), elemAddr);
553   auto c = builder.create<fir::LoadOp>(loc, codeAddr);
554   auto isBlank = builder.create<mlir::arith::CmpIOp>(
555       loc, mlir::arith::CmpIPredicate::eq, blank, c);
556   llvm::SmallVector<mlir::Value> results = {isBlank, index};
557   builder.create<fir::ResultOp>(loc, results);
558   builder.restoreInsertionPoint(insPt);
559   // Compute length after iteration (zero if all blanks)
560   mlir::Value newLen =
561       builder.create<mlir::arith::AddIOp>(loc, iterWhile.getResult(1), one);
562   auto result = builder.create<mlir::arith::SelectOp>(
563       loc, iterWhile.getResult(0), zero, newLen);
564   return builder.createConvert(loc, builder.getCharacterLengthType(), result);
565 }
566 
567 fir::CharBoxValue
568 fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
569                                                        int len) {
570   assert(len >= 0 && "expected positive length");
571   auto kind = recoverCharacterType(type).getFKind();
572   auto charType = fir::CharacterType::get(builder.getContext(), kind, len);
573   auto addr = builder.create<fir::AllocaOp>(loc, charType);
574   auto mlirLen =
575       builder.createIntegerConstant(loc, builder.getCharacterLengthType(), len);
576   return {addr, mlirLen};
577 }
578 
579 // Returns integer with code for blank. The integer has the same
580 // size as the character. Blank has ascii space code for all kinds.
581 mlir::Value fir::factory::CharacterExprHelper::createBlankConstantCode(
582     fir::CharacterType type) {
583   auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
584   auto intType = builder.getIntegerType(bits);
585   return builder.createIntegerConstant(loc, intType, ' ');
586 }
587 
588 mlir::Value fir::factory::CharacterExprHelper::createBlankConstant(
589     fir::CharacterType type) {
590   return createSingletonFromCode(createBlankConstantCode(type),
591                                  type.getFKind());
592 }
593 
594 void fir::factory::CharacterExprHelper::createAssign(
595     const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs) {
596   if (auto *str = rhs.getBoxOf<fir::CharBoxValue>()) {
597     if (auto *to = lhs.getBoxOf<fir::CharBoxValue>()) {
598       createAssign(*to, *str);
599       return;
600     }
601   }
602   TODO(loc, "character array assignment");
603   // Note that it is not sure the array aspect should be handled
604   // by this utility.
605 }
606 
607 mlir::Value
608 fir::factory::CharacterExprHelper::createEmboxChar(mlir::Value addr,
609                                                    mlir::Value len) {
610   return createEmbox(fir::CharBoxValue{addr, len});
611 }
612 
613 std::pair<mlir::Value, mlir::Value>
614 fir::factory::CharacterExprHelper::createUnboxChar(mlir::Value boxChar) {
615   using T = std::pair<mlir::Value, mlir::Value>;
616   return toExtendedValue(boxChar).match(
617       [](const fir::CharBoxValue &b) -> T {
618         return {b.getBuffer(), b.getLen()};
619       },
620       [](const fir::CharArrayBoxValue &b) -> T {
621         return {b.getBuffer(), b.getLen()};
622       },
623       [](const auto &) -> T { llvm::report_fatal_error("not a character"); });
624 }
625 
626 bool fir::factory::CharacterExprHelper::isCharacterLiteral(mlir::Type type) {
627   if (auto seqType = type.dyn_cast<fir::SequenceType>())
628     return (seqType.getShape().size() == 1) &&
629            fir::isa_char(seqType.getEleTy());
630   return false;
631 }
632 
633 fir::KindTy
634 fir::factory::CharacterExprHelper::getCharacterKind(mlir::Type type) {
635   assert(isCharacterScalar(type) && "expected scalar character");
636   return recoverCharacterType(type).getFKind();
637 }
638 
639 fir::KindTy
640 fir::factory::CharacterExprHelper::getCharacterOrSequenceKind(mlir::Type type) {
641   return recoverCharacterType(type).getFKind();
642 }
643 
644 bool fir::factory::CharacterExprHelper::hasConstantLengthInType(
645     const fir::ExtendedValue &exv) {
646   auto charTy = recoverCharacterType(fir::getBase(exv).getType());
647   return charTy.hasConstantLen();
648 }
649 
650 mlir::Value
651 fir::factory::CharacterExprHelper::createSingletonFromCode(mlir::Value code,
652                                                            int kind) {
653   auto charType = fir::CharacterType::get(builder.getContext(), kind, 1);
654   auto bits = builder.getKindMap().getCharacterBitsize(kind);
655   auto intType = builder.getIntegerType(bits);
656   auto cast = builder.createConvert(loc, intType, code);
657   auto undef = builder.create<fir::UndefOp>(loc, charType);
658   auto zero = builder.getIntegerAttr(builder.getIndexType(), 0);
659   return builder.create<fir::InsertValueOp>(loc, charType, undef, cast,
660                                             builder.getArrayAttr(zero));
661 }
662 
663 mlir::Value fir::factory::CharacterExprHelper::extractCodeFromSingleton(
664     mlir::Value singleton) {
665   auto type = getCharacterType(singleton);
666   assert(type.getLen() == 1);
667   auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
668   auto intType = builder.getIntegerType(bits);
669   auto zero = builder.getIntegerAttr(builder.getIndexType(), 0);
670   return builder.create<fir::ExtractValueOp>(loc, intType, singleton,
671                                              builder.getArrayAttr(zero));
672 }
673 
674 mlir::Value
675 fir::factory::CharacterExprHelper::readLengthFromBox(mlir::Value box) {
676   auto lenTy = builder.getCharacterLengthType();
677   auto size = builder.create<fir::BoxEleSizeOp>(loc, lenTy, box);
678   auto charTy = recoverCharacterType(box.getType());
679   auto bits = builder.getKindMap().getCharacterBitsize(charTy.getFKind());
680   auto width = bits / 8;
681   if (width > 1) {
682     auto widthVal = builder.createIntegerConstant(loc, lenTy, width);
683     return builder.create<mlir::arith::DivSIOp>(loc, size, widthVal);
684   }
685   return size;
686 }
687 
688 mlir::Value fir::factory::CharacterExprHelper::getLength(mlir::Value memref) {
689   auto memrefType = memref.getType();
690   auto charType = recoverCharacterType(memrefType);
691   assert(charType && "must be a character type");
692   if (charType.hasConstantLen())
693     return builder.createIntegerConstant(loc, builder.getCharacterLengthType(),
694                                          charType.getLen());
695   if (memrefType.isa<fir::BoxType>())
696     return readLengthFromBox(memref);
697   if (memrefType.isa<fir::BoxCharType>())
698     return createUnboxChar(memref).second;
699 
700   // Length cannot be deduced from memref.
701   return {};
702 }
703 
704 std::pair<mlir::Value, mlir::Value>
705 fir::factory::extractCharacterProcedureTuple(fir::FirOpBuilder &builder,
706                                              mlir::Location loc,
707                                              mlir::Value tuple) {
708   mlir::TupleType tupleType = tuple.getType().cast<mlir::TupleType>();
709   mlir::Value addr = builder.create<fir::ExtractValueOp>(
710       loc, tupleType.getType(0), tuple,
711       builder.getArrayAttr(
712           {builder.getIntegerAttr(builder.getIndexType(), 0)}));
713   mlir::Value proc = [&]() -> mlir::Value {
714     if (auto addrTy = addr.getType().dyn_cast<fir::BoxProcType>())
715       return builder.create<fir::BoxAddrOp>(loc, addrTy.getEleTy(), addr);
716     return addr;
717   }();
718   mlir::Value len = builder.create<fir::ExtractValueOp>(
719       loc, tupleType.getType(1), tuple,
720       builder.getArrayAttr(
721           {builder.getIntegerAttr(builder.getIndexType(), 1)}));
722   return {proc, len};
723 }
724 
725 mlir::Value fir::factory::createCharacterProcedureTuple(
726     fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type argTy,
727     mlir::Value addr, mlir::Value len) {
728   mlir::TupleType tupleType = argTy.cast<mlir::TupleType>();
729   addr = builder.createConvert(loc, tupleType.getType(0), addr);
730   len = builder.createConvert(loc, tupleType.getType(1), len);
731   mlir::Value tuple = builder.create<fir::UndefOp>(loc, tupleType);
732   tuple = builder.create<fir::InsertValueOp>(
733       loc, tupleType, tuple, addr,
734       builder.getArrayAttr(
735           {builder.getIntegerAttr(builder.getIndexType(), 0)}));
736   tuple = builder.create<fir::InsertValueOp>(
737       loc, tupleType, tuple, len,
738       builder.getArrayAttr(
739           {builder.getIntegerAttr(builder.getIndexType(), 1)}));
740   return tuple;
741 }
742 
743 mlir::Type
744 fir::factory::getCharacterProcedureTupleType(mlir::Type funcPointerType) {
745   mlir::MLIRContext *context = funcPointerType.getContext();
746   mlir::Type lenType = mlir::IntegerType::get(context, 64);
747   return mlir::TupleType::get(context, {funcPointerType, lenType});
748 }
749