1 //===-- Character.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "flang/Optimizer/Builder/Character.h"
14 #include "flang/Lower/Todo.h"
15 #include "flang/Optimizer/Builder/DoLoopHelper.h"
16 #include "llvm/Support/Debug.h"
17 #include <optional>
18 
19 #define DEBUG_TYPE "flang-lower-character"
20 
21 using namespace mlir;
22 
23 //===----------------------------------------------------------------------===//
24 // CharacterExprHelper implementation
25 //===----------------------------------------------------------------------===//
26 
27 /// Unwrap base fir.char<kind,len> type.
28 static fir::CharacterType recoverCharacterType(mlir::Type type) {
29   if (auto boxType = type.dyn_cast<fir::BoxCharType>())
30     return boxType.getEleTy();
31   while (true) {
32     type = fir::unwrapRefType(type);
33     if (auto boxTy = type.dyn_cast<fir::BoxType>())
34       type = boxTy.getEleTy();
35     else
36       break;
37   }
38   return fir::unwrapSequenceType(type).cast<fir::CharacterType>();
39 }
40 
41 /// Get fir.char<kind> type with the same kind as inside str.
42 fir::CharacterType
43 fir::factory::CharacterExprHelper::getCharacterType(mlir::Type type) {
44   assert(isCharacterScalar(type) && "expected scalar character");
45   return recoverCharacterType(type);
46 }
47 
48 fir::CharacterType
49 fir::factory::CharacterExprHelper::getCharType(mlir::Type type) {
50   return recoverCharacterType(type);
51 }
52 
53 fir::CharacterType fir::factory::CharacterExprHelper::getCharacterType(
54     const fir::CharBoxValue &box) {
55   return getCharacterType(box.getBuffer().getType());
56 }
57 
58 fir::CharacterType
59 fir::factory::CharacterExprHelper::getCharacterType(mlir::Value str) {
60   return getCharacterType(str.getType());
61 }
62 
63 /// Determine the static size of the character. Returns the computed size, not
64 /// an IR Value.
65 static std::optional<fir::CharacterType::LenType>
66 getCompileTimeLength(const fir::CharBoxValue &box) {
67   auto len = recoverCharacterType(box.getBuffer().getType()).getLen();
68   if (len == fir::CharacterType::unknownLen())
69     return {};
70   return len;
71 }
72 
73 /// Detect the precondition that the value `str` does not reside in memory. Such
74 /// values will have a type `!fir.array<...x!fir.char<N>>` or `!fir.char<N>`.
75 LLVM_ATTRIBUTE_UNUSED static bool needToMaterialize(mlir::Value str) {
76   return str.getType().isa<fir::SequenceType>() || fir::isa_char(str.getType());
77 }
78 
79 /// Unwrap integer constant from mlir::Value.
80 static llvm::Optional<std::int64_t> getIntIfConstant(mlir::Value value) {
81   if (auto *definingOp = value.getDefiningOp())
82     if (auto cst = mlir::dyn_cast<mlir::arith::ConstantOp>(definingOp))
83       if (auto intAttr = cst.getValue().dyn_cast<mlir::IntegerAttr>())
84         return intAttr.getInt();
85   return {};
86 }
87 
88 /// This is called only if `str` does not reside in memory. Such a bare string
89 /// value will be converted into a memory-based temporary and an extended
90 /// boxchar value returned.
91 fir::CharBoxValue
92 fir::factory::CharacterExprHelper::materializeValue(mlir::Value str) {
93   assert(needToMaterialize(str));
94   auto ty = str.getType();
95   assert(isCharacterScalar(ty) && "expected scalar character");
96   auto charTy = ty.dyn_cast<fir::CharacterType>();
97   if (!charTy || charTy.getLen() == fir::CharacterType::unknownLen()) {
98     LLVM_DEBUG(llvm::dbgs() << "cannot materialize: " << str << '\n');
99     llvm_unreachable("must be a !fir.char<N> type");
100   }
101   auto len = builder.createIntegerConstant(
102       loc, builder.getCharacterLengthType(), charTy.getLen());
103   auto temp = builder.create<fir::AllocaOp>(loc, charTy);
104   builder.create<fir::StoreOp>(loc, str, temp);
105   LLVM_DEBUG(llvm::dbgs() << "materialized as local: " << str << " -> (" << temp
106                           << ", " << len << ")\n");
107   return {temp, len};
108 }
109 
110 fir::ExtendedValue
111 fir::factory::CharacterExprHelper::toExtendedValue(mlir::Value character,
112                                                    mlir::Value len) {
113   auto lenType = builder.getCharacterLengthType();
114   auto type = character.getType();
115   auto base = fir::isa_passbyref_type(type) ? character : mlir::Value{};
116   auto resultLen = len;
117   llvm::SmallVector<mlir::Value> extents;
118 
119   if (auto eleType = fir::dyn_cast_ptrEleTy(type))
120     type = eleType;
121 
122   if (auto arrayType = type.dyn_cast<fir::SequenceType>()) {
123     type = arrayType.getEleTy();
124     auto indexType = builder.getIndexType();
125     for (auto extent : arrayType.getShape()) {
126       if (extent == fir::SequenceType::getUnknownExtent())
127         break;
128       extents.emplace_back(
129           builder.createIntegerConstant(loc, indexType, extent));
130     }
131     // Last extent might be missing in case of assumed-size. If more extents
132     // could not be deduced from type, that's an error (a fir.box should
133     // have been used in the interface).
134     if (extents.size() + 1 < arrayType.getShape().size())
135       mlir::emitError(loc, "cannot retrieve array extents from type");
136   }
137 
138   if (auto charTy = type.dyn_cast<fir::CharacterType>()) {
139     if (!resultLen && charTy.getLen() != fir::CharacterType::unknownLen())
140       resultLen = builder.createIntegerConstant(loc, lenType, charTy.getLen());
141   } else if (auto boxCharType = type.dyn_cast<fir::BoxCharType>()) {
142     auto refType = builder.getRefType(boxCharType.getEleTy());
143     // If the embox is accessible, use its operand to avoid filling
144     // the generated fir with embox/unbox.
145     mlir::Value boxCharLen;
146     if (auto *definingOp = character.getDefiningOp()) {
147       if (auto box = dyn_cast<fir::EmboxCharOp>(definingOp)) {
148         base = box.getMemref();
149         boxCharLen = box.getLen();
150       }
151     }
152     if (!boxCharLen) {
153       auto unboxed =
154           builder.create<fir::UnboxCharOp>(loc, refType, lenType, character);
155       base = builder.createConvert(loc, refType, unboxed.getResult(0));
156       boxCharLen = unboxed.getResult(1);
157     }
158     if (!resultLen) {
159       resultLen = boxCharLen;
160     }
161   } else if (type.isa<fir::BoxType>()) {
162     mlir::emitError(loc, "descriptor or derived type not yet handled");
163   } else {
164     llvm_unreachable("Cannot translate mlir::Value to character ExtendedValue");
165   }
166 
167   if (!base) {
168     if (auto load =
169             mlir::dyn_cast_or_null<fir::LoadOp>(character.getDefiningOp())) {
170       base = load.getOperand();
171     } else {
172       return materializeValue(fir::getBase(character));
173     }
174   }
175   if (!resultLen)
176     llvm::report_fatal_error("no dynamic length found for character");
177   if (!extents.empty())
178     return fir::CharArrayBoxValue{base, resultLen, extents};
179   return fir::CharBoxValue{base, resultLen};
180 }
181 
182 static mlir::Type getSingletonCharType(mlir::MLIRContext *ctxt, int kind) {
183   return fir::CharacterType::getSingleton(ctxt, kind);
184 }
185 
186 mlir::Value
187 fir::factory::CharacterExprHelper::createEmbox(const fir::CharBoxValue &box) {
188   // Base CharBoxValue of CharArrayBoxValue are ok here (do not require a scalar
189   // type)
190   auto charTy = recoverCharacterType(box.getBuffer().getType());
191   auto boxCharType =
192       fir::BoxCharType::get(builder.getContext(), charTy.getFKind());
193   auto refType = fir::ReferenceType::get(boxCharType.getEleTy());
194   mlir::Value buff = box.getBuffer();
195   // fir.boxchar requires a memory reference. Allocate temp if the character is
196   // not in memory.
197   if (!fir::isa_ref_type(buff.getType())) {
198     auto temp = builder.createTemporary(loc, buff.getType());
199     builder.create<fir::StoreOp>(loc, buff, temp);
200     buff = temp;
201   }
202   buff = builder.createConvert(loc, refType, buff);
203   // Convert in case the provided length is not of the integer type that must
204   // be used in boxchar.
205   auto len = builder.createConvert(loc, builder.getCharacterLengthType(),
206                                    box.getLen());
207   return builder.create<fir::EmboxCharOp>(loc, boxCharType, buff, len);
208 }
209 
210 fir::CharBoxValue fir::factory::CharacterExprHelper::toScalarCharacter(
211     const fir::CharArrayBoxValue &box) {
212   if (box.getBuffer().getType().isa<fir::PointerType>())
213     TODO(loc, "concatenating non contiguous character array into a scalar");
214 
215   // TODO: add a fast path multiplying new length at compile time if the info is
216   // in the array type.
217   auto lenType = builder.getCharacterLengthType();
218   auto len = builder.createConvert(loc, lenType, box.getLen());
219   for (auto extent : box.getExtents())
220     len = builder.create<arith::MulIOp>(
221         loc, len, builder.createConvert(loc, lenType, extent));
222 
223   // TODO: typeLen can be improved in compiled constant cases
224   // TODO: allow bare fir.array<> (no ref) conversion here ?
225   auto typeLen = fir::CharacterType::unknownLen();
226   auto kind = recoverCharacterType(box.getBuffer().getType()).getFKind();
227   auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen);
228   auto type = fir::ReferenceType::get(charTy);
229   auto buffer = builder.createConvert(loc, type, box.getBuffer());
230   return {buffer, len};
231 }
232 
233 mlir::Value fir::factory::CharacterExprHelper::createEmbox(
234     const fir::CharArrayBoxValue &box) {
235   // Use same embox as for scalar. It's losing the actual data size information
236   // (We do not multiply the length by the array size), but that is what Fortran
237   // call interfaces using boxchar expect.
238   return createEmbox(static_cast<const fir::CharBoxValue &>(box));
239 }
240 
241 /// Get the address of the element at position \p index of the scalar character
242 /// \p buffer.
243 /// \p buffer must be of type !fir.ref<fir.char<k, len>>. The length may be
244 /// unknown. \p index must have any integer type, and is zero based. The return
245 /// value is a singleton address (!fir.ref<!fir.char<kind>>)
246 mlir::Value
247 fir::factory::CharacterExprHelper::createElementAddr(mlir::Value buffer,
248                                                      mlir::Value index) {
249   // The only way to address an element of a fir.ref<char<kind, len>> is to cast
250   // it to a fir.array<len x fir.char<kind>> and use fir.coordinate_of.
251   auto bufferType = buffer.getType();
252   assert(fir::isa_ref_type(bufferType));
253   assert(isCharacterScalar(bufferType));
254   auto charTy = recoverCharacterType(bufferType);
255   auto singleTy = getSingletonCharType(builder.getContext(), charTy.getFKind());
256   auto singleRefTy = builder.getRefType(singleTy);
257   auto extent = fir::SequenceType::getUnknownExtent();
258   if (charTy.getLen() != fir::CharacterType::unknownLen())
259     extent = charTy.getLen();
260   auto coorTy = builder.getRefType(fir::SequenceType::get({extent}, singleTy));
261 
262   auto coor = builder.createConvert(loc, coorTy, buffer);
263   auto i = builder.createConvert(loc, builder.getIndexType(), index);
264   return builder.create<fir::CoordinateOp>(loc, singleRefTy, coor, i);
265 }
266 
267 /// Load a character out of `buff` from offset `index`.
268 /// `buff` must be a reference to memory.
269 mlir::Value
270 fir::factory::CharacterExprHelper::createLoadCharAt(mlir::Value buff,
271                                                     mlir::Value index) {
272   LLVM_DEBUG(llvm::dbgs() << "load a char: " << buff << " type: "
273                           << buff.getType() << " at: " << index << '\n');
274   return builder.create<fir::LoadOp>(loc, createElementAddr(buff, index));
275 }
276 
277 /// Store the singleton character `c` to `str` at offset `index`.
278 /// `str` must be a reference to memory.
279 void fir::factory::CharacterExprHelper::createStoreCharAt(mlir::Value str,
280                                                           mlir::Value index,
281                                                           mlir::Value c) {
282   LLVM_DEBUG(llvm::dbgs() << "store the char: " << c << " into: " << str
283                           << " type: " << str.getType() << " at: " << index
284                           << '\n');
285   auto addr = createElementAddr(str, index);
286   builder.create<fir::StoreOp>(loc, c, addr);
287 }
288 
289 // FIXME: this temp is useless... either fir.coordinate_of needs to
290 // work on "loaded" characters (!fir.array<len x fir.char<kind>>) or
291 // character should never be loaded.
292 // If this is a fir.array<>, allocate and store the value so that
293 // fir.cooridnate_of can be use on the value.
294 mlir::Value fir::factory::CharacterExprHelper::getCharBoxBuffer(
295     const fir::CharBoxValue &box) {
296   auto buff = box.getBuffer();
297   if (fir::isa_char(buff.getType())) {
298     auto newBuff = builder.create<fir::AllocaOp>(loc, buff.getType());
299     builder.create<fir::StoreOp>(loc, buff, newBuff);
300     return newBuff;
301   }
302   return buff;
303 }
304 
305 /// Get the LLVM intrinsic for `memcpy`. Use the 64 bit version.
306 mlir::FuncOp fir::factory::getLlvmMemcpy(fir::FirOpBuilder &builder) {
307   auto ptrTy = builder.getRefType(builder.getIntegerType(8));
308   llvm::SmallVector<mlir::Type> args = {ptrTy, ptrTy, builder.getI64Type(),
309                                         builder.getI1Type()};
310   auto memcpyTy =
311       mlir::FunctionType::get(builder.getContext(), args, llvm::None);
312   return builder.addNamedFunction(builder.getUnknownLoc(),
313                                   "llvm.memcpy.p0i8.p0i8.i64", memcpyTy);
314 }
315 
316 /// Get the LLVM intrinsic for `memmove`. Use the 64 bit version.
317 mlir::FuncOp fir::factory::getLlvmMemmove(fir::FirOpBuilder &builder) {
318   auto ptrTy = builder.getRefType(builder.getIntegerType(8));
319   llvm::SmallVector<mlir::Type> args = {ptrTy, ptrTy, builder.getI64Type(),
320                                         builder.getI1Type()};
321   auto memmoveTy =
322       mlir::FunctionType::get(builder.getContext(), args, llvm::None);
323   return builder.addNamedFunction(builder.getUnknownLoc(),
324                                   "llvm.memmove.p0i8.p0i8.i64", memmoveTy);
325 }
326 
327 /// Get the LLVM intrinsic for `memset`. Use the 64 bit version.
328 mlir::FuncOp fir::factory::getLlvmMemset(fir::FirOpBuilder &builder) {
329   auto ptrTy = builder.getRefType(builder.getIntegerType(8));
330   llvm::SmallVector<mlir::Type> args = {ptrTy, ptrTy, builder.getI64Type(),
331                                         builder.getI1Type()};
332   auto memsetTy =
333       mlir::FunctionType::get(builder.getContext(), args, llvm::None);
334   return builder.addNamedFunction(builder.getUnknownLoc(),
335                                   "llvm.memset.p0i8.p0i8.i64", memsetTy);
336 }
337 
338 /// Get the standard `realloc` function.
339 mlir::FuncOp fir::factory::getRealloc(fir::FirOpBuilder &builder) {
340   auto ptrTy = builder.getRefType(builder.getIntegerType(8));
341   llvm::SmallVector<mlir::Type> args = {ptrTy, builder.getI64Type()};
342   auto reallocTy = mlir::FunctionType::get(builder.getContext(), args, {ptrTy});
343   return builder.addNamedFunction(builder.getUnknownLoc(), "realloc",
344                                   reallocTy);
345 }
346 
347 /// Create a loop to copy `count` characters from `src` to `dest`. Note that the
348 /// KIND indicates the number of bits in a code point. (ASCII, UCS-2, or UCS-4.)
349 void fir::factory::CharacterExprHelper::createCopy(
350     const fir::CharBoxValue &dest, const fir::CharBoxValue &src,
351     mlir::Value count) {
352   auto fromBuff = getCharBoxBuffer(src);
353   auto toBuff = getCharBoxBuffer(dest);
354   LLVM_DEBUG(llvm::dbgs() << "create char copy from: "; src.dump();
355              llvm::dbgs() << " to: "; dest.dump();
356              llvm::dbgs() << " count: " << count << '\n');
357   auto kind = getCharacterKind(src.getBuffer().getType());
358   // If the src and dest are the same KIND, then use memmove to move the bits.
359   // We don't have to worry about overlapping ranges with memmove.
360   if (getCharacterKind(dest.getBuffer().getType()) == kind) {
361     auto bytes = builder.getKindMap().getCharacterBitsize(kind) / 8;
362     auto i64Ty = builder.getI64Type();
363     auto kindBytes = builder.createIntegerConstant(loc, i64Ty, bytes);
364     auto castCount = builder.createConvert(loc, i64Ty, count);
365     auto totalBytes = builder.create<arith::MulIOp>(loc, kindBytes, castCount);
366     auto notVolatile = builder.createBool(loc, false);
367     auto memmv = getLlvmMemmove(builder);
368     auto argTys = memmv.getType().getInputs();
369     auto toPtr = builder.createConvert(loc, argTys[0], toBuff);
370     auto fromPtr = builder.createConvert(loc, argTys[1], fromBuff);
371     builder.create<fir::CallOp>(
372         loc, memmv, mlir::ValueRange{toPtr, fromPtr, totalBytes, notVolatile});
373     return;
374   }
375 
376   // Convert a CHARACTER of one KIND into a CHARACTER of another KIND.
377   builder.create<fir::CharConvertOp>(loc, src.getBuffer(), count,
378                                      dest.getBuffer());
379 }
380 
381 void fir::factory::CharacterExprHelper::createPadding(
382     const fir::CharBoxValue &str, mlir::Value lower, mlir::Value upper) {
383   auto blank = createBlankConstant(getCharacterType(str));
384   // Always create the loop, if upper < lower, no iteration will be
385   // executed.
386   auto toBuff = getCharBoxBuffer(str);
387   fir::factory::DoLoopHelper{builder, loc}.createLoop(
388       lower, upper, [&](fir::FirOpBuilder &, mlir::Value index) {
389         createStoreCharAt(toBuff, index, blank);
390       });
391 }
392 
393 fir::CharBoxValue
394 fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
395                                                        mlir::Value len) {
396   auto kind = recoverCharacterType(type).getFKind();
397   auto typeLen = fir::CharacterType::unknownLen();
398   // If len is a constant, reflect the length in the type.
399   if (auto cstLen = getIntIfConstant(len))
400     typeLen = *cstLen;
401   auto *ctxt = builder.getContext();
402   auto charTy = fir::CharacterType::get(ctxt, kind, typeLen);
403   llvm::SmallVector<mlir::Value> lenParams;
404   if (typeLen == fir::CharacterType::unknownLen())
405     lenParams.push_back(len);
406   auto ref = builder.allocateLocal(loc, charTy, "", ".chrtmp",
407                                    /*shape=*/llvm::None, lenParams);
408   return {ref, len};
409 }
410 
411 fir::CharBoxValue fir::factory::CharacterExprHelper::createTempFrom(
412     const fir::ExtendedValue &source) {
413   const auto *charBox = source.getCharBox();
414   if (!charBox)
415     fir::emitFatalError(loc, "source must be a fir::CharBoxValue");
416   auto len = charBox->getLen();
417   auto sourceTy = charBox->getBuffer().getType();
418   auto temp = createCharacterTemp(sourceTy, len);
419   if (fir::isa_ref_type(sourceTy)) {
420     createCopy(temp, *charBox, len);
421   } else {
422     auto ref = builder.createConvert(loc, builder.getRefType(sourceTy),
423                                      temp.getBuffer());
424     builder.create<fir::StoreOp>(loc, charBox->getBuffer(), ref);
425   }
426   return temp;
427 }
428 
429 // Simple length one character assignment without loops.
430 void fir::factory::CharacterExprHelper::createLengthOneAssign(
431     const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
432   auto addr = lhs.getBuffer();
433   mlir::Value val = builder.create<fir::LoadOp>(loc, rhs.getBuffer());
434   auto addrTy = builder.getRefType(val.getType());
435   addr = builder.createConvert(loc, addrTy, addr);
436   builder.create<fir::StoreOp>(loc, val, addr);
437 }
438 
439 /// Returns the minimum of integer mlir::Value \p a and \b.
440 mlir::Value genMin(fir::FirOpBuilder &builder, mlir::Location loc,
441                    mlir::Value a, mlir::Value b) {
442   auto cmp =
443       builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt, a, b);
444   return builder.create<mlir::arith::SelectOp>(loc, cmp, a, b);
445 }
446 
447 void fir::factory::CharacterExprHelper::createAssign(
448     const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
449   auto rhsCstLen = getCompileTimeLength(rhs);
450   auto lhsCstLen = getCompileTimeLength(lhs);
451   bool compileTimeSameLength =
452       lhsCstLen && rhsCstLen && *lhsCstLen == *rhsCstLen;
453 
454   if (compileTimeSameLength && *lhsCstLen == 1) {
455     createLengthOneAssign(lhs, rhs);
456     return;
457   }
458 
459   // Copy the minimum of the lhs and rhs lengths and pad the lhs remainder
460   // if needed.
461   auto copyCount = lhs.getLen();
462   auto idxTy = builder.getIndexType();
463   if (!compileTimeSameLength) {
464     auto lhsLen = builder.createConvert(loc, idxTy, lhs.getLen());
465     auto rhsLen = builder.createConvert(loc, idxTy, rhs.getLen());
466     copyCount = genMin(builder, loc, lhsLen, rhsLen);
467   }
468 
469   // Actual copy
470   createCopy(lhs, rhs, copyCount);
471 
472   // Pad if needed.
473   if (!compileTimeSameLength) {
474     auto one = builder.createIntegerConstant(loc, lhs.getLen().getType(), 1);
475     auto maxPadding = builder.create<arith::SubIOp>(loc, lhs.getLen(), one);
476     createPadding(lhs, copyCount, maxPadding);
477   }
478 }
479 
480 fir::CharBoxValue fir::factory::CharacterExprHelper::createConcatenate(
481     const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
482   auto lhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
483                                       lhs.getLen());
484   auto rhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
485                                       rhs.getLen());
486   mlir::Value len = builder.create<arith::AddIOp>(loc, lhsLen, rhsLen);
487   auto temp = createCharacterTemp(getCharacterType(rhs), len);
488   createCopy(temp, lhs, lhsLen);
489   auto one = builder.createIntegerConstant(loc, len.getType(), 1);
490   auto upperBound = builder.create<arith::SubIOp>(loc, len, one);
491   auto lhsLenIdx = builder.createConvert(loc, builder.getIndexType(), lhsLen);
492   auto fromBuff = getCharBoxBuffer(rhs);
493   auto toBuff = getCharBoxBuffer(temp);
494   fir::factory::DoLoopHelper{builder, loc}.createLoop(
495       lhsLenIdx, upperBound, one,
496       [&](fir::FirOpBuilder &bldr, mlir::Value index) {
497         auto rhsIndex = bldr.create<arith::SubIOp>(loc, index, lhsLenIdx);
498         auto charVal = createLoadCharAt(fromBuff, rhsIndex);
499         createStoreCharAt(toBuff, index, charVal);
500       });
501   return temp;
502 }
503 
504 fir::CharBoxValue fir::factory::CharacterExprHelper::createSubstring(
505     const fir::CharBoxValue &box, llvm::ArrayRef<mlir::Value> bounds) {
506   // Constant need to be materialize in memory to use fir.coordinate_of.
507   auto nbounds = bounds.size();
508   if (nbounds < 1 || nbounds > 2) {
509     mlir::emitError(loc, "Incorrect number of bounds in substring");
510     return {mlir::Value{}, mlir::Value{}};
511   }
512   mlir::SmallVector<mlir::Value> castBounds;
513   // Convert bounds to length type to do safe arithmetic on it.
514   for (auto bound : bounds)
515     castBounds.push_back(
516         builder.createConvert(loc, builder.getCharacterLengthType(), bound));
517   auto lowerBound = castBounds[0];
518   // FIR CoordinateOp is zero based but Fortran substring are one based.
519   auto one = builder.createIntegerConstant(loc, lowerBound.getType(), 1);
520   auto offset = builder.create<arith::SubIOp>(loc, lowerBound, one).getResult();
521   auto addr = createElementAddr(box.getBuffer(), offset);
522   auto kind = getCharacterKind(box.getBuffer().getType());
523   auto charTy = fir::CharacterType::getUnknownLen(builder.getContext(), kind);
524   auto resultType = builder.getRefType(charTy);
525   auto substringRef = builder.createConvert(loc, resultType, addr);
526 
527   // Compute the length.
528   mlir::Value substringLen;
529   if (nbounds < 2) {
530     substringLen =
531         builder.create<arith::SubIOp>(loc, box.getLen(), castBounds[0]);
532   } else {
533     substringLen =
534         builder.create<arith::SubIOp>(loc, castBounds[1], castBounds[0]);
535   }
536   substringLen = builder.create<arith::AddIOp>(loc, substringLen, one);
537 
538   // Set length to zero if bounds were reversed (Fortran 2018 9.4.1)
539   auto zero = builder.createIntegerConstant(loc, substringLen.getType(), 0);
540   auto cdt = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt,
541                                            substringLen, zero);
542   substringLen =
543       builder.create<mlir::arith::SelectOp>(loc, cdt, zero, substringLen);
544 
545   return {substringRef, substringLen};
546 }
547 
548 mlir::Value
549 fir::factory::CharacterExprHelper::createLenTrim(const fir::CharBoxValue &str) {
550   // Note: Runtime for LEN_TRIM should also be available at some
551   // point. For now use an inlined implementation.
552   auto indexType = builder.getIndexType();
553   auto len = builder.createConvert(loc, indexType, str.getLen());
554   auto one = builder.createIntegerConstant(loc, indexType, 1);
555   auto minusOne = builder.createIntegerConstant(loc, indexType, -1);
556   auto zero = builder.createIntegerConstant(loc, indexType, 0);
557   auto trueVal = builder.createIntegerConstant(loc, builder.getI1Type(), 1);
558   auto blank = createBlankConstantCode(getCharacterType(str));
559   mlir::Value lastChar = builder.create<arith::SubIOp>(loc, len, one);
560 
561   auto iterWhile =
562       builder.create<fir::IterWhileOp>(loc, lastChar, zero, minusOne, trueVal,
563                                        /*returnFinalCount=*/false, lastChar);
564   auto insPt = builder.saveInsertionPoint();
565   builder.setInsertionPointToStart(iterWhile.getBody());
566   auto index = iterWhile.getInductionVar();
567   // Look for first non-blank from the right of the character.
568   auto fromBuff = getCharBoxBuffer(str);
569   auto elemAddr = createElementAddr(fromBuff, index);
570   auto codeAddr =
571       builder.createConvert(loc, builder.getRefType(blank.getType()), elemAddr);
572   auto c = builder.create<fir::LoadOp>(loc, codeAddr);
573   auto isBlank =
574       builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq, blank, c);
575   llvm::SmallVector<mlir::Value> results = {isBlank, index};
576   builder.create<fir::ResultOp>(loc, results);
577   builder.restoreInsertionPoint(insPt);
578   // Compute length after iteration (zero if all blanks)
579   mlir::Value newLen =
580       builder.create<arith::AddIOp>(loc, iterWhile.getResult(1), one);
581   auto result = builder.create<mlir::arith::SelectOp>(
582       loc, iterWhile.getResult(0), zero, newLen);
583   return builder.createConvert(loc, builder.getCharacterLengthType(), result);
584 }
585 
586 fir::CharBoxValue
587 fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
588                                                        int len) {
589   assert(len >= 0 && "expected positive length");
590   auto kind = recoverCharacterType(type).getFKind();
591   auto charType = fir::CharacterType::get(builder.getContext(), kind, len);
592   auto addr = builder.create<fir::AllocaOp>(loc, charType);
593   auto mlirLen =
594       builder.createIntegerConstant(loc, builder.getCharacterLengthType(), len);
595   return {addr, mlirLen};
596 }
597 
598 // Returns integer with code for blank. The integer has the same
599 // size as the character. Blank has ascii space code for all kinds.
600 mlir::Value fir::factory::CharacterExprHelper::createBlankConstantCode(
601     fir::CharacterType type) {
602   auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
603   auto intType = builder.getIntegerType(bits);
604   return builder.createIntegerConstant(loc, intType, ' ');
605 }
606 
607 mlir::Value fir::factory::CharacterExprHelper::createBlankConstant(
608     fir::CharacterType type) {
609   return createSingletonFromCode(createBlankConstantCode(type),
610                                  type.getFKind());
611 }
612 
613 void fir::factory::CharacterExprHelper::createAssign(
614     const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs) {
615   if (auto *str = rhs.getBoxOf<fir::CharBoxValue>()) {
616     if (auto *to = lhs.getBoxOf<fir::CharBoxValue>()) {
617       createAssign(*to, *str);
618       return;
619     }
620   }
621   TODO(loc, "character array assignment");
622   // Note that it is not sure the array aspect should be handled
623   // by this utility.
624 }
625 
626 mlir::Value
627 fir::factory::CharacterExprHelper::createEmboxChar(mlir::Value addr,
628                                                    mlir::Value len) {
629   return createEmbox(fir::CharBoxValue{addr, len});
630 }
631 
632 std::pair<mlir::Value, mlir::Value>
633 fir::factory::CharacterExprHelper::createUnboxChar(mlir::Value boxChar) {
634   using T = std::pair<mlir::Value, mlir::Value>;
635   return toExtendedValue(boxChar).match(
636       [](const fir::CharBoxValue &b) -> T {
637         return {b.getBuffer(), b.getLen()};
638       },
639       [](const fir::CharArrayBoxValue &b) -> T {
640         return {b.getBuffer(), b.getLen()};
641       },
642       [](const auto &) -> T { llvm::report_fatal_error("not a character"); });
643 }
644 
645 bool fir::factory::CharacterExprHelper::isCharacterLiteral(mlir::Type type) {
646   if (auto seqType = type.dyn_cast<fir::SequenceType>())
647     return (seqType.getShape().size() == 1) &&
648            fir::isa_char(seqType.getEleTy());
649   return false;
650 }
651 
652 bool fir::factory::CharacterExprHelper::isCharacterScalar(mlir::Type type) {
653   if (type.isa<fir::BoxCharType>())
654     return true;
655   type = fir::unwrapRefType(type);
656   if (auto boxTy = type.dyn_cast<fir::BoxType>())
657     type = boxTy.getEleTy();
658   type = fir::unwrapRefType(type);
659   return !type.isa<fir::SequenceType>() && fir::isa_char(type);
660 }
661 
662 fir::KindTy
663 fir::factory::CharacterExprHelper::getCharacterKind(mlir::Type type) {
664   assert(isCharacterScalar(type) && "expected scalar character");
665   return recoverCharacterType(type).getFKind();
666 }
667 
668 fir::KindTy
669 fir::factory::CharacterExprHelper::getCharacterOrSequenceKind(mlir::Type type) {
670   return recoverCharacterType(type).getFKind();
671 }
672 
673 bool fir::factory::CharacterExprHelper::isArray(mlir::Type type) {
674   return !isCharacterScalar(type);
675 }
676 
677 bool fir::factory::CharacterExprHelper::hasConstantLengthInType(
678     const fir::ExtendedValue &exv) {
679   auto charTy = recoverCharacterType(fir::getBase(exv).getType());
680   return charTy.hasConstantLen();
681 }
682 
683 mlir::Value
684 fir::factory::CharacterExprHelper::createSingletonFromCode(mlir::Value code,
685                                                            int kind) {
686   auto charType = fir::CharacterType::get(builder.getContext(), kind, 1);
687   auto bits = builder.getKindMap().getCharacterBitsize(kind);
688   auto intType = builder.getIntegerType(bits);
689   auto cast = builder.createConvert(loc, intType, code);
690   auto undef = builder.create<fir::UndefOp>(loc, charType);
691   auto zero = builder.getIntegerAttr(builder.getIndexType(), 0);
692   return builder.create<fir::InsertValueOp>(loc, charType, undef, cast,
693                                             builder.getArrayAttr(zero));
694 }
695 
696 mlir::Value fir::factory::CharacterExprHelper::extractCodeFromSingleton(
697     mlir::Value singleton) {
698   auto type = getCharacterType(singleton);
699   assert(type.getLen() == 1);
700   auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
701   auto intType = builder.getIntegerType(bits);
702   auto zero = builder.getIntegerAttr(builder.getIndexType(), 0);
703   return builder.create<fir::ExtractValueOp>(loc, intType, singleton,
704                                              builder.getArrayAttr(zero));
705 }
706 
707 mlir::Value
708 fir::factory::CharacterExprHelper::readLengthFromBox(mlir::Value box) {
709   auto lenTy = builder.getCharacterLengthType();
710   auto size = builder.create<fir::BoxEleSizeOp>(loc, lenTy, box);
711   auto charTy = recoverCharacterType(box.getType());
712   auto bits = builder.getKindMap().getCharacterBitsize(charTy.getFKind());
713   auto width = bits / 8;
714   if (width > 1) {
715     auto widthVal = builder.createIntegerConstant(loc, lenTy, width);
716     return builder.create<arith::DivSIOp>(loc, size, widthVal);
717   }
718   return size;
719 }
720 
721 mlir::Value fir::factory::CharacterExprHelper::getLength(mlir::Value memref) {
722   auto memrefType = memref.getType();
723   auto charType = recoverCharacterType(memrefType);
724   assert(charType && "must be a character type");
725   if (charType.hasConstantLen())
726     return builder.createIntegerConstant(loc, builder.getCharacterLengthType(),
727                                          charType.getLen());
728   if (memrefType.isa<fir::BoxType>())
729     return readLengthFromBox(memref);
730   if (memrefType.isa<fir::BoxCharType>())
731     return createUnboxChar(memref).second;
732 
733   // Length cannot be deduced from memref.
734   return {};
735 }
736 
737 std::pair<mlir::Value, mlir::Value>
738 fir::factory::extractCharacterProcedureTuple(fir::FirOpBuilder &builder,
739                                              mlir::Location loc,
740                                              mlir::Value tuple) {
741   mlir::TupleType tupleType = tuple.getType().cast<mlir::TupleType>();
742   mlir::Value addr = builder.create<fir::ExtractValueOp>(
743       loc, tupleType.getType(0), tuple,
744       builder.getArrayAttr(
745           {builder.getIntegerAttr(builder.getIndexType(), 0)}));
746   mlir::Value len = builder.create<fir::ExtractValueOp>(
747       loc, tupleType.getType(1), tuple,
748       builder.getArrayAttr(
749           {builder.getIntegerAttr(builder.getIndexType(), 1)}));
750   return {addr, len};
751 }
752 
753 mlir::Value fir::factory::createCharacterProcedureTuple(
754     fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type argTy,
755     mlir::Value addr, mlir::Value len) {
756   mlir::TupleType tupleType = argTy.cast<mlir::TupleType>();
757   addr = builder.createConvert(loc, tupleType.getType(0), addr);
758   len = builder.createConvert(loc, tupleType.getType(1), len);
759   mlir::Value tuple = builder.create<fir::UndefOp>(loc, tupleType);
760   tuple = builder.create<fir::InsertValueOp>(
761       loc, tupleType, tuple, addr,
762       builder.getArrayAttr(
763           {builder.getIntegerAttr(builder.getIndexType(), 0)}));
764   tuple = builder.create<fir::InsertValueOp>(
765       loc, tupleType, tuple, len,
766       builder.getArrayAttr(
767           {builder.getIntegerAttr(builder.getIndexType(), 1)}));
768   return tuple;
769 }
770 
771 bool fir::factory::isCharacterProcedureTuple(mlir::Type ty) {
772   mlir::TupleType tuple = ty.dyn_cast<mlir::TupleType>();
773   return tuple && tuple.size() == 2 &&
774          tuple.getType(0).isa<mlir::FunctionType>() &&
775          fir::isa_integer(tuple.getType(1));
776 }
777 
778 mlir::Type
779 fir::factory::getCharacterProcedureTupleType(mlir::Type funcPointerType) {
780   mlir::MLIRContext *context = funcPointerType.getContext();
781   mlir::Type lenType = mlir::IntegerType::get(context, 64);
782   return mlir::TupleType::get(context, {funcPointerType, lenType});
783 }
784