1 //===-- Character.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "flang/Optimizer/Builder/Character.h"
14 #include "flang/Lower/Todo.h"
15 #include "flang/Optimizer/Builder/DoLoopHelper.h"
16 #include "llvm/Support/Debug.h"
17 #include <optional>
18 
19 #define DEBUG_TYPE "flang-lower-character"
20 
21 //===----------------------------------------------------------------------===//
22 // CharacterExprHelper implementation
23 //===----------------------------------------------------------------------===//
24 
25 /// Unwrap base fir.char<kind,len> type.
26 static fir::CharacterType recoverCharacterType(mlir::Type type) {
27   if (auto boxType = type.dyn_cast<fir::BoxCharType>())
28     return boxType.getEleTy();
29   while (true) {
30     type = fir::unwrapRefType(type);
31     if (auto boxTy = type.dyn_cast<fir::BoxType>())
32       type = boxTy.getEleTy();
33     else
34       break;
35   }
36   return fir::unwrapSequenceType(type).cast<fir::CharacterType>();
37 }
38 
39 /// Get fir.char<kind> type with the same kind as inside str.
40 fir::CharacterType
41 fir::factory::CharacterExprHelper::getCharacterType(mlir::Type type) {
42   assert(isCharacterScalar(type) && "expected scalar character");
43   return recoverCharacterType(type);
44 }
45 
46 fir::CharacterType
47 fir::factory::CharacterExprHelper::getCharType(mlir::Type type) {
48   return recoverCharacterType(type);
49 }
50 
51 fir::CharacterType fir::factory::CharacterExprHelper::getCharacterType(
52     const fir::CharBoxValue &box) {
53   return getCharacterType(box.getBuffer().getType());
54 }
55 
56 fir::CharacterType
57 fir::factory::CharacterExprHelper::getCharacterType(mlir::Value str) {
58   return getCharacterType(str.getType());
59 }
60 
61 /// Determine the static size of the character. Returns the computed size, not
62 /// an IR Value.
63 static std::optional<fir::CharacterType::LenType>
64 getCompileTimeLength(const fir::CharBoxValue &box) {
65   auto len = recoverCharacterType(box.getBuffer().getType()).getLen();
66   if (len == fir::CharacterType::unknownLen())
67     return {};
68   return len;
69 }
70 
71 /// Detect the precondition that the value `str` does not reside in memory. Such
72 /// values will have a type `!fir.array<...x!fir.char<N>>` or `!fir.char<N>`.
73 LLVM_ATTRIBUTE_UNUSED static bool needToMaterialize(mlir::Value str) {
74   return str.getType().isa<fir::SequenceType>() || fir::isa_char(str.getType());
75 }
76 
77 /// Unwrap integer constant from mlir::Value.
78 static llvm::Optional<std::int64_t> getIntIfConstant(mlir::Value value) {
79   if (auto *definingOp = value.getDefiningOp())
80     if (auto cst = mlir::dyn_cast<mlir::arith::ConstantOp>(definingOp))
81       if (auto intAttr = cst.getValue().dyn_cast<mlir::IntegerAttr>())
82         return intAttr.getInt();
83   return {};
84 }
85 
86 /// This is called only if `str` does not reside in memory. Such a bare string
87 /// value will be converted into a memory-based temporary and an extended
88 /// boxchar value returned.
89 fir::CharBoxValue
90 fir::factory::CharacterExprHelper::materializeValue(mlir::Value str) {
91   assert(needToMaterialize(str));
92   auto ty = str.getType();
93   assert(isCharacterScalar(ty) && "expected scalar character");
94   auto charTy = ty.dyn_cast<fir::CharacterType>();
95   if (!charTy || charTy.getLen() == fir::CharacterType::unknownLen()) {
96     LLVM_DEBUG(llvm::dbgs() << "cannot materialize: " << str << '\n');
97     llvm_unreachable("must be a !fir.char<N> type");
98   }
99   auto len = builder.createIntegerConstant(
100       loc, builder.getCharacterLengthType(), charTy.getLen());
101   auto temp = builder.create<fir::AllocaOp>(loc, charTy);
102   builder.create<fir::StoreOp>(loc, str, temp);
103   LLVM_DEBUG(llvm::dbgs() << "materialized as local: " << str << " -> (" << temp
104                           << ", " << len << ")\n");
105   return {temp, len};
106 }
107 
108 fir::ExtendedValue
109 fir::factory::CharacterExprHelper::toExtendedValue(mlir::Value character,
110                                                    mlir::Value len) {
111   auto lenType = builder.getCharacterLengthType();
112   auto type = character.getType();
113   auto base = fir::isa_passbyref_type(type) ? character : mlir::Value{};
114   auto resultLen = len;
115   llvm::SmallVector<mlir::Value> extents;
116 
117   if (auto eleType = fir::dyn_cast_ptrEleTy(type))
118     type = eleType;
119 
120   if (auto arrayType = type.dyn_cast<fir::SequenceType>()) {
121     type = arrayType.getEleTy();
122     auto indexType = builder.getIndexType();
123     for (auto extent : arrayType.getShape()) {
124       if (extent == fir::SequenceType::getUnknownExtent())
125         break;
126       extents.emplace_back(
127           builder.createIntegerConstant(loc, indexType, extent));
128     }
129     // Last extent might be missing in case of assumed-size. If more extents
130     // could not be deduced from type, that's an error (a fir.box should
131     // have been used in the interface).
132     if (extents.size() + 1 < arrayType.getShape().size())
133       mlir::emitError(loc, "cannot retrieve array extents from type");
134   }
135 
136   if (auto charTy = type.dyn_cast<fir::CharacterType>()) {
137     if (!resultLen && charTy.getLen() != fir::CharacterType::unknownLen())
138       resultLen = builder.createIntegerConstant(loc, lenType, charTy.getLen());
139   } else if (auto boxCharType = type.dyn_cast<fir::BoxCharType>()) {
140     auto refType = builder.getRefType(boxCharType.getEleTy());
141     // If the embox is accessible, use its operand to avoid filling
142     // the generated fir with embox/unbox.
143     mlir::Value boxCharLen;
144     if (auto *definingOp = character.getDefiningOp()) {
145       if (auto box = dyn_cast<fir::EmboxCharOp>(definingOp)) {
146         base = box.getMemref();
147         boxCharLen = box.getLen();
148       }
149     }
150     if (!boxCharLen) {
151       auto unboxed =
152           builder.create<fir::UnboxCharOp>(loc, refType, lenType, character);
153       base = builder.createConvert(loc, refType, unboxed.getResult(0));
154       boxCharLen = unboxed.getResult(1);
155     }
156     if (!resultLen) {
157       resultLen = boxCharLen;
158     }
159   } else if (type.isa<fir::BoxType>()) {
160     mlir::emitError(loc, "descriptor or derived type not yet handled");
161   } else {
162     llvm_unreachable("Cannot translate mlir::Value to character ExtendedValue");
163   }
164 
165   if (!base) {
166     if (auto load =
167             mlir::dyn_cast_or_null<fir::LoadOp>(character.getDefiningOp())) {
168       base = load.getOperand();
169     } else {
170       return materializeValue(fir::getBase(character));
171     }
172   }
173   if (!resultLen)
174     llvm::report_fatal_error("no dynamic length found for character");
175   if (!extents.empty())
176     return fir::CharArrayBoxValue{base, resultLen, extents};
177   return fir::CharBoxValue{base, resultLen};
178 }
179 
180 static mlir::Type getSingletonCharType(mlir::MLIRContext *ctxt, int kind) {
181   return fir::CharacterType::getSingleton(ctxt, kind);
182 }
183 
184 mlir::Value
185 fir::factory::CharacterExprHelper::createEmbox(const fir::CharBoxValue &box) {
186   // Base CharBoxValue of CharArrayBoxValue are ok here (do not require a scalar
187   // type)
188   auto charTy = recoverCharacterType(box.getBuffer().getType());
189   auto boxCharType =
190       fir::BoxCharType::get(builder.getContext(), charTy.getFKind());
191   auto refType = fir::ReferenceType::get(boxCharType.getEleTy());
192   mlir::Value buff = box.getBuffer();
193   // fir.boxchar requires a memory reference. Allocate temp if the character is
194   // not in memory.
195   if (!fir::isa_ref_type(buff.getType())) {
196     auto temp = builder.createTemporary(loc, buff.getType());
197     builder.create<fir::StoreOp>(loc, buff, temp);
198     buff = temp;
199   }
200   buff = builder.createConvert(loc, refType, buff);
201   // Convert in case the provided length is not of the integer type that must
202   // be used in boxchar.
203   auto len = builder.createConvert(loc, builder.getCharacterLengthType(),
204                                    box.getLen());
205   return builder.create<fir::EmboxCharOp>(loc, boxCharType, buff, len);
206 }
207 
208 fir::CharBoxValue fir::factory::CharacterExprHelper::toScalarCharacter(
209     const fir::CharArrayBoxValue &box) {
210   if (box.getBuffer().getType().isa<fir::PointerType>())
211     TODO(loc, "concatenating non contiguous character array into a scalar");
212 
213   // TODO: add a fast path multiplying new length at compile time if the info is
214   // in the array type.
215   auto lenType = builder.getCharacterLengthType();
216   auto len = builder.createConvert(loc, lenType, box.getLen());
217   for (auto extent : box.getExtents())
218     len = builder.create<arith::MulIOp>(
219         loc, len, builder.createConvert(loc, lenType, extent));
220 
221   // TODO: typeLen can be improved in compiled constant cases
222   // TODO: allow bare fir.array<> (no ref) conversion here ?
223   auto typeLen = fir::CharacterType::unknownLen();
224   auto kind = recoverCharacterType(box.getBuffer().getType()).getFKind();
225   auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen);
226   auto type = fir::ReferenceType::get(charTy);
227   auto buffer = builder.createConvert(loc, type, box.getBuffer());
228   return {buffer, len};
229 }
230 
231 mlir::Value fir::factory::CharacterExprHelper::createEmbox(
232     const fir::CharArrayBoxValue &box) {
233   // Use same embox as for scalar. It's losing the actual data size information
234   // (We do not multiply the length by the array size), but that is what Fortran
235   // call interfaces using boxchar expect.
236   return createEmbox(static_cast<const fir::CharBoxValue &>(box));
237 }
238 
239 /// Get the address of the element at position \p index of the scalar character
240 /// \p buffer.
241 /// \p buffer must be of type !fir.ref<fir.char<k, len>>. The length may be
242 /// unknown. \p index must have any integer type, and is zero based. The return
243 /// value is a singleton address (!fir.ref<!fir.char<kind>>)
244 mlir::Value
245 fir::factory::CharacterExprHelper::createElementAddr(mlir::Value buffer,
246                                                      mlir::Value index) {
247   // The only way to address an element of a fir.ref<char<kind, len>> is to cast
248   // it to a fir.array<len x fir.char<kind>> and use fir.coordinate_of.
249   auto bufferType = buffer.getType();
250   assert(fir::isa_ref_type(bufferType));
251   assert(isCharacterScalar(bufferType));
252   auto charTy = recoverCharacterType(bufferType);
253   auto singleTy = getSingletonCharType(builder.getContext(), charTy.getFKind());
254   auto singleRefTy = builder.getRefType(singleTy);
255   auto extent = fir::SequenceType::getUnknownExtent();
256   if (charTy.getLen() != fir::CharacterType::unknownLen())
257     extent = charTy.getLen();
258   auto coorTy = builder.getRefType(fir::SequenceType::get({extent}, singleTy));
259 
260   auto coor = builder.createConvert(loc, coorTy, buffer);
261   auto i = builder.createConvert(loc, builder.getIndexType(), index);
262   return builder.create<fir::CoordinateOp>(loc, singleRefTy, coor, i);
263 }
264 
265 /// Load a character out of `buff` from offset `index`.
266 /// `buff` must be a reference to memory.
267 mlir::Value
268 fir::factory::CharacterExprHelper::createLoadCharAt(mlir::Value buff,
269                                                     mlir::Value index) {
270   LLVM_DEBUG(llvm::dbgs() << "load a char: " << buff << " type: "
271                           << buff.getType() << " at: " << index << '\n');
272   return builder.create<fir::LoadOp>(loc, createElementAddr(buff, index));
273 }
274 
275 /// Store the singleton character `c` to `str` at offset `index`.
276 /// `str` must be a reference to memory.
277 void fir::factory::CharacterExprHelper::createStoreCharAt(mlir::Value str,
278                                                           mlir::Value index,
279                                                           mlir::Value c) {
280   LLVM_DEBUG(llvm::dbgs() << "store the char: " << c << " into: " << str
281                           << " type: " << str.getType() << " at: " << index
282                           << '\n');
283   auto addr = createElementAddr(str, index);
284   builder.create<fir::StoreOp>(loc, c, addr);
285 }
286 
287 // FIXME: this temp is useless... either fir.coordinate_of needs to
288 // work on "loaded" characters (!fir.array<len x fir.char<kind>>) or
289 // character should never be loaded.
290 // If this is a fir.array<>, allocate and store the value so that
291 // fir.cooridnate_of can be use on the value.
292 mlir::Value fir::factory::CharacterExprHelper::getCharBoxBuffer(
293     const fir::CharBoxValue &box) {
294   auto buff = box.getBuffer();
295   if (fir::isa_char(buff.getType())) {
296     auto newBuff = builder.create<fir::AllocaOp>(loc, buff.getType());
297     builder.create<fir::StoreOp>(loc, buff, newBuff);
298     return newBuff;
299   }
300   return buff;
301 }
302 
303 /// Get the LLVM intrinsic for `memcpy`. Use the 64 bit version.
304 mlir::FuncOp fir::factory::getLlvmMemcpy(fir::FirOpBuilder &builder) {
305   auto ptrTy = builder.getRefType(builder.getIntegerType(8));
306   llvm::SmallVector<mlir::Type> args = {ptrTy, ptrTy, builder.getI64Type(),
307                                         builder.getI1Type()};
308   auto memcpyTy =
309       mlir::FunctionType::get(builder.getContext(), args, llvm::None);
310   return builder.addNamedFunction(builder.getUnknownLoc(),
311                                   "llvm.memcpy.p0i8.p0i8.i64", memcpyTy);
312 }
313 
314 /// Get the LLVM intrinsic for `memmove`. Use the 64 bit version.
315 mlir::FuncOp fir::factory::getLlvmMemmove(fir::FirOpBuilder &builder) {
316   auto ptrTy = builder.getRefType(builder.getIntegerType(8));
317   llvm::SmallVector<mlir::Type> args = {ptrTy, ptrTy, builder.getI64Type(),
318                                         builder.getI1Type()};
319   auto memmoveTy =
320       mlir::FunctionType::get(builder.getContext(), args, llvm::None);
321   return builder.addNamedFunction(builder.getUnknownLoc(),
322                                   "llvm.memmove.p0i8.p0i8.i64", memmoveTy);
323 }
324 
325 /// Get the LLVM intrinsic for `memset`. Use the 64 bit version.
326 mlir::FuncOp fir::factory::getLlvmMemset(fir::FirOpBuilder &builder) {
327   auto ptrTy = builder.getRefType(builder.getIntegerType(8));
328   llvm::SmallVector<mlir::Type> args = {ptrTy, ptrTy, builder.getI64Type(),
329                                         builder.getI1Type()};
330   auto memsetTy =
331       mlir::FunctionType::get(builder.getContext(), args, llvm::None);
332   return builder.addNamedFunction(builder.getUnknownLoc(),
333                                   "llvm.memset.p0i8.p0i8.i64", memsetTy);
334 }
335 
336 /// Get the standard `realloc` function.
337 mlir::FuncOp fir::factory::getRealloc(fir::FirOpBuilder &builder) {
338   auto ptrTy = builder.getRefType(builder.getIntegerType(8));
339   llvm::SmallVector<mlir::Type> args = {ptrTy, builder.getI64Type()};
340   auto reallocTy = mlir::FunctionType::get(builder.getContext(), args, {ptrTy});
341   return builder.addNamedFunction(builder.getUnknownLoc(), "realloc",
342                                   reallocTy);
343 }
344 
345 /// Create a loop to copy `count` characters from `src` to `dest`. Note that the
346 /// KIND indicates the number of bits in a code point. (ASCII, UCS-2, or UCS-4.)
347 void fir::factory::CharacterExprHelper::createCopy(
348     const fir::CharBoxValue &dest, const fir::CharBoxValue &src,
349     mlir::Value count) {
350   auto fromBuff = getCharBoxBuffer(src);
351   auto toBuff = getCharBoxBuffer(dest);
352   LLVM_DEBUG(llvm::dbgs() << "create char copy from: "; src.dump();
353              llvm::dbgs() << " to: "; dest.dump();
354              llvm::dbgs() << " count: " << count << '\n');
355   auto kind = getCharacterKind(src.getBuffer().getType());
356   // If the src and dest are the same KIND, then use memmove to move the bits.
357   // We don't have to worry about overlapping ranges with memmove.
358   if (getCharacterKind(dest.getBuffer().getType()) == kind) {
359     auto bytes = builder.getKindMap().getCharacterBitsize(kind) / 8;
360     auto i64Ty = builder.getI64Type();
361     auto kindBytes = builder.createIntegerConstant(loc, i64Ty, bytes);
362     auto castCount = builder.createConvert(loc, i64Ty, count);
363     auto totalBytes = builder.create<arith::MulIOp>(loc, kindBytes, castCount);
364     auto notVolatile = builder.createBool(loc, false);
365     auto memmv = getLlvmMemmove(builder);
366     auto argTys = memmv.getType().getInputs();
367     auto toPtr = builder.createConvert(loc, argTys[0], toBuff);
368     auto fromPtr = builder.createConvert(loc, argTys[1], fromBuff);
369     builder.create<fir::CallOp>(
370         loc, memmv, mlir::ValueRange{toPtr, fromPtr, totalBytes, notVolatile});
371     return;
372   }
373 
374   // Convert a CHARACTER of one KIND into a CHARACTER of another KIND.
375   builder.create<fir::CharConvertOp>(loc, src.getBuffer(), count,
376                                      dest.getBuffer());
377 }
378 
379 void fir::factory::CharacterExprHelper::createPadding(
380     const fir::CharBoxValue &str, mlir::Value lower, mlir::Value upper) {
381   auto blank = createBlankConstant(getCharacterType(str));
382   // Always create the loop, if upper < lower, no iteration will be
383   // executed.
384   auto toBuff = getCharBoxBuffer(str);
385   fir::factory::DoLoopHelper{builder, loc}.createLoop(
386       lower, upper, [&](fir::FirOpBuilder &, mlir::Value index) {
387         createStoreCharAt(toBuff, index, blank);
388       });
389 }
390 
391 fir::CharBoxValue
392 fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
393                                                        mlir::Value len) {
394   auto kind = recoverCharacterType(type).getFKind();
395   auto typeLen = fir::CharacterType::unknownLen();
396   // If len is a constant, reflect the length in the type.
397   if (auto cstLen = getIntIfConstant(len))
398     typeLen = *cstLen;
399   auto *ctxt = builder.getContext();
400   auto charTy = fir::CharacterType::get(ctxt, kind, typeLen);
401   llvm::SmallVector<mlir::Value> lenParams;
402   if (typeLen == fir::CharacterType::unknownLen())
403     lenParams.push_back(len);
404   auto ref = builder.allocateLocal(loc, charTy, "", ".chrtmp",
405                                    /*shape=*/llvm::None, lenParams);
406   return {ref, len};
407 }
408 
409 fir::CharBoxValue fir::factory::CharacterExprHelper::createTempFrom(
410     const fir::ExtendedValue &source) {
411   const auto *charBox = source.getCharBox();
412   if (!charBox)
413     fir::emitFatalError(loc, "source must be a fir::CharBoxValue");
414   auto len = charBox->getLen();
415   auto sourceTy = charBox->getBuffer().getType();
416   auto temp = createCharacterTemp(sourceTy, len);
417   if (fir::isa_ref_type(sourceTy)) {
418     createCopy(temp, *charBox, len);
419   } else {
420     auto ref = builder.createConvert(loc, builder.getRefType(sourceTy),
421                                      temp.getBuffer());
422     builder.create<fir::StoreOp>(loc, charBox->getBuffer(), ref);
423   }
424   return temp;
425 }
426 
427 // Simple length one character assignment without loops.
428 void fir::factory::CharacterExprHelper::createLengthOneAssign(
429     const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
430   auto addr = lhs.getBuffer();
431   mlir::Value val = builder.create<fir::LoadOp>(loc, rhs.getBuffer());
432   auto addrTy = builder.getRefType(val.getType());
433   addr = builder.createConvert(loc, addrTy, addr);
434   builder.create<fir::StoreOp>(loc, val, addr);
435 }
436 
437 /// Returns the minimum of integer mlir::Value \p a and \b.
438 mlir::Value genMin(fir::FirOpBuilder &builder, mlir::Location loc,
439                    mlir::Value a, mlir::Value b) {
440   auto cmp =
441       builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt, a, b);
442   return builder.create<mlir::arith::SelectOp>(loc, cmp, a, b);
443 }
444 
445 void fir::factory::CharacterExprHelper::createAssign(
446     const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
447   auto rhsCstLen = getCompileTimeLength(rhs);
448   auto lhsCstLen = getCompileTimeLength(lhs);
449   bool compileTimeSameLength =
450       lhsCstLen && rhsCstLen && *lhsCstLen == *rhsCstLen;
451 
452   if (compileTimeSameLength && *lhsCstLen == 1) {
453     createLengthOneAssign(lhs, rhs);
454     return;
455   }
456 
457   // Copy the minimum of the lhs and rhs lengths and pad the lhs remainder
458   // if needed.
459   auto copyCount = lhs.getLen();
460   auto idxTy = builder.getIndexType();
461   if (!compileTimeSameLength) {
462     auto lhsLen = builder.createConvert(loc, idxTy, lhs.getLen());
463     auto rhsLen = builder.createConvert(loc, idxTy, rhs.getLen());
464     copyCount = genMin(builder, loc, lhsLen, rhsLen);
465   }
466 
467   // Actual copy
468   createCopy(lhs, rhs, copyCount);
469 
470   // Pad if needed.
471   if (!compileTimeSameLength) {
472     auto one = builder.createIntegerConstant(loc, lhs.getLen().getType(), 1);
473     auto maxPadding = builder.create<arith::SubIOp>(loc, lhs.getLen(), one);
474     createPadding(lhs, copyCount, maxPadding);
475   }
476 }
477 
478 fir::CharBoxValue fir::factory::CharacterExprHelper::createConcatenate(
479     const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
480   auto lhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
481                                       lhs.getLen());
482   auto rhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
483                                       rhs.getLen());
484   mlir::Value len = builder.create<arith::AddIOp>(loc, lhsLen, rhsLen);
485   auto temp = createCharacterTemp(getCharacterType(rhs), len);
486   createCopy(temp, lhs, lhsLen);
487   auto one = builder.createIntegerConstant(loc, len.getType(), 1);
488   auto upperBound = builder.create<arith::SubIOp>(loc, len, one);
489   auto lhsLenIdx = builder.createConvert(loc, builder.getIndexType(), lhsLen);
490   auto fromBuff = getCharBoxBuffer(rhs);
491   auto toBuff = getCharBoxBuffer(temp);
492   fir::factory::DoLoopHelper{builder, loc}.createLoop(
493       lhsLenIdx, upperBound, one,
494       [&](fir::FirOpBuilder &bldr, mlir::Value index) {
495         auto rhsIndex = bldr.create<arith::SubIOp>(loc, index, lhsLenIdx);
496         auto charVal = createLoadCharAt(fromBuff, rhsIndex);
497         createStoreCharAt(toBuff, index, charVal);
498       });
499   return temp;
500 }
501 
502 fir::CharBoxValue fir::factory::CharacterExprHelper::createSubstring(
503     const fir::CharBoxValue &box, llvm::ArrayRef<mlir::Value> bounds) {
504   // Constant need to be materialize in memory to use fir.coordinate_of.
505   auto nbounds = bounds.size();
506   if (nbounds < 1 || nbounds > 2) {
507     mlir::emitError(loc, "Incorrect number of bounds in substring");
508     return {mlir::Value{}, mlir::Value{}};
509   }
510   mlir::SmallVector<mlir::Value> castBounds;
511   // Convert bounds to length type to do safe arithmetic on it.
512   for (auto bound : bounds)
513     castBounds.push_back(
514         builder.createConvert(loc, builder.getCharacterLengthType(), bound));
515   auto lowerBound = castBounds[0];
516   // FIR CoordinateOp is zero based but Fortran substring are one based.
517   auto one = builder.createIntegerConstant(loc, lowerBound.getType(), 1);
518   auto offset = builder.create<arith::SubIOp>(loc, lowerBound, one).getResult();
519   auto addr = createElementAddr(box.getBuffer(), offset);
520   auto kind = getCharacterKind(box.getBuffer().getType());
521   auto charTy = fir::CharacterType::getUnknownLen(builder.getContext(), kind);
522   auto resultType = builder.getRefType(charTy);
523   auto substringRef = builder.createConvert(loc, resultType, addr);
524 
525   // Compute the length.
526   mlir::Value substringLen;
527   if (nbounds < 2) {
528     substringLen =
529         builder.create<arith::SubIOp>(loc, box.getLen(), castBounds[0]);
530   } else {
531     substringLen =
532         builder.create<arith::SubIOp>(loc, castBounds[1], castBounds[0]);
533   }
534   substringLen = builder.create<arith::AddIOp>(loc, substringLen, one);
535 
536   // Set length to zero if bounds were reversed (Fortran 2018 9.4.1)
537   auto zero = builder.createIntegerConstant(loc, substringLen.getType(), 0);
538   auto cdt = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt,
539                                            substringLen, zero);
540   substringLen =
541       builder.create<mlir::arith::SelectOp>(loc, cdt, zero, substringLen);
542 
543   return {substringRef, substringLen};
544 }
545 
546 mlir::Value
547 fir::factory::CharacterExprHelper::createLenTrim(const fir::CharBoxValue &str) {
548   // Note: Runtime for LEN_TRIM should also be available at some
549   // point. For now use an inlined implementation.
550   auto indexType = builder.getIndexType();
551   auto len = builder.createConvert(loc, indexType, str.getLen());
552   auto one = builder.createIntegerConstant(loc, indexType, 1);
553   auto minusOne = builder.createIntegerConstant(loc, indexType, -1);
554   auto zero = builder.createIntegerConstant(loc, indexType, 0);
555   auto trueVal = builder.createIntegerConstant(loc, builder.getI1Type(), 1);
556   auto blank = createBlankConstantCode(getCharacterType(str));
557   mlir::Value lastChar = builder.create<arith::SubIOp>(loc, len, one);
558 
559   auto iterWhile =
560       builder.create<fir::IterWhileOp>(loc, lastChar, zero, minusOne, trueVal,
561                                        /*returnFinalCount=*/false, lastChar);
562   auto insPt = builder.saveInsertionPoint();
563   builder.setInsertionPointToStart(iterWhile.getBody());
564   auto index = iterWhile.getInductionVar();
565   // Look for first non-blank from the right of the character.
566   auto fromBuff = getCharBoxBuffer(str);
567   auto elemAddr = createElementAddr(fromBuff, index);
568   auto codeAddr =
569       builder.createConvert(loc, builder.getRefType(blank.getType()), elemAddr);
570   auto c = builder.create<fir::LoadOp>(loc, codeAddr);
571   auto isBlank =
572       builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq, blank, c);
573   llvm::SmallVector<mlir::Value> results = {isBlank, index};
574   builder.create<fir::ResultOp>(loc, results);
575   builder.restoreInsertionPoint(insPt);
576   // Compute length after iteration (zero if all blanks)
577   mlir::Value newLen =
578       builder.create<arith::AddIOp>(loc, iterWhile.getResult(1), one);
579   auto result = builder.create<mlir::arith::SelectOp>(
580       loc, iterWhile.getResult(0), zero, newLen);
581   return builder.createConvert(loc, builder.getCharacterLengthType(), result);
582 }
583 
584 fir::CharBoxValue
585 fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
586                                                        int len) {
587   assert(len >= 0 && "expected positive length");
588   auto kind = recoverCharacterType(type).getFKind();
589   auto charType = fir::CharacterType::get(builder.getContext(), kind, len);
590   auto addr = builder.create<fir::AllocaOp>(loc, charType);
591   auto mlirLen =
592       builder.createIntegerConstant(loc, builder.getCharacterLengthType(), len);
593   return {addr, mlirLen};
594 }
595 
596 // Returns integer with code for blank. The integer has the same
597 // size as the character. Blank has ascii space code for all kinds.
598 mlir::Value fir::factory::CharacterExprHelper::createBlankConstantCode(
599     fir::CharacterType type) {
600   auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
601   auto intType = builder.getIntegerType(bits);
602   return builder.createIntegerConstant(loc, intType, ' ');
603 }
604 
605 mlir::Value fir::factory::CharacterExprHelper::createBlankConstant(
606     fir::CharacterType type) {
607   return createSingletonFromCode(createBlankConstantCode(type),
608                                  type.getFKind());
609 }
610 
611 void fir::factory::CharacterExprHelper::createAssign(
612     const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs) {
613   if (auto *str = rhs.getBoxOf<fir::CharBoxValue>()) {
614     if (auto *to = lhs.getBoxOf<fir::CharBoxValue>()) {
615       createAssign(*to, *str);
616       return;
617     }
618   }
619   TODO(loc, "character array assignment");
620   // Note that it is not sure the array aspect should be handled
621   // by this utility.
622 }
623 
624 mlir::Value
625 fir::factory::CharacterExprHelper::createEmboxChar(mlir::Value addr,
626                                                    mlir::Value len) {
627   return createEmbox(fir::CharBoxValue{addr, len});
628 }
629 
630 std::pair<mlir::Value, mlir::Value>
631 fir::factory::CharacterExprHelper::createUnboxChar(mlir::Value boxChar) {
632   using T = std::pair<mlir::Value, mlir::Value>;
633   return toExtendedValue(boxChar).match(
634       [](const fir::CharBoxValue &b) -> T {
635         return {b.getBuffer(), b.getLen()};
636       },
637       [](const fir::CharArrayBoxValue &b) -> T {
638         return {b.getBuffer(), b.getLen()};
639       },
640       [](const auto &) -> T { llvm::report_fatal_error("not a character"); });
641 }
642 
643 bool fir::factory::CharacterExprHelper::isCharacterLiteral(mlir::Type type) {
644   if (auto seqType = type.dyn_cast<fir::SequenceType>())
645     return (seqType.getShape().size() == 1) &&
646            fir::isa_char(seqType.getEleTy());
647   return false;
648 }
649 
650 bool fir::factory::CharacterExprHelper::isCharacterScalar(mlir::Type type) {
651   if (type.isa<fir::BoxCharType>())
652     return true;
653   type = fir::unwrapRefType(type);
654   if (auto boxTy = type.dyn_cast<fir::BoxType>())
655     type = boxTy.getEleTy();
656   type = fir::unwrapRefType(type);
657   return !type.isa<fir::SequenceType>() && fir::isa_char(type);
658 }
659 
660 fir::KindTy
661 fir::factory::CharacterExprHelper::getCharacterKind(mlir::Type type) {
662   assert(isCharacterScalar(type) && "expected scalar character");
663   return recoverCharacterType(type).getFKind();
664 }
665 
666 fir::KindTy
667 fir::factory::CharacterExprHelper::getCharacterOrSequenceKind(mlir::Type type) {
668   return recoverCharacterType(type).getFKind();
669 }
670 
671 bool fir::factory::CharacterExprHelper::isArray(mlir::Type type) {
672   return !isCharacterScalar(type);
673 }
674 
675 bool fir::factory::CharacterExprHelper::hasConstantLengthInType(
676     const fir::ExtendedValue &exv) {
677   auto charTy = recoverCharacterType(fir::getBase(exv).getType());
678   return charTy.hasConstantLen();
679 }
680 
681 mlir::Value
682 fir::factory::CharacterExprHelper::createSingletonFromCode(mlir::Value code,
683                                                            int kind) {
684   auto charType = fir::CharacterType::get(builder.getContext(), kind, 1);
685   auto bits = builder.getKindMap().getCharacterBitsize(kind);
686   auto intType = builder.getIntegerType(bits);
687   auto cast = builder.createConvert(loc, intType, code);
688   auto undef = builder.create<fir::UndefOp>(loc, charType);
689   auto zero = builder.getIntegerAttr(builder.getIndexType(), 0);
690   return builder.create<fir::InsertValueOp>(loc, charType, undef, cast,
691                                             builder.getArrayAttr(zero));
692 }
693 
694 mlir::Value fir::factory::CharacterExprHelper::extractCodeFromSingleton(
695     mlir::Value singleton) {
696   auto type = getCharacterType(singleton);
697   assert(type.getLen() == 1);
698   auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
699   auto intType = builder.getIntegerType(bits);
700   auto zero = builder.getIntegerAttr(builder.getIndexType(), 0);
701   return builder.create<fir::ExtractValueOp>(loc, intType, singleton,
702                                              builder.getArrayAttr(zero));
703 }
704 
705 mlir::Value
706 fir::factory::CharacterExprHelper::readLengthFromBox(mlir::Value box) {
707   auto lenTy = builder.getCharacterLengthType();
708   auto size = builder.create<fir::BoxEleSizeOp>(loc, lenTy, box);
709   auto charTy = recoverCharacterType(box.getType());
710   auto bits = builder.getKindMap().getCharacterBitsize(charTy.getFKind());
711   auto width = bits / 8;
712   if (width > 1) {
713     auto widthVal = builder.createIntegerConstant(loc, lenTy, width);
714     return builder.create<arith::DivSIOp>(loc, size, widthVal);
715   }
716   return size;
717 }
718 
719 mlir::Value fir::factory::CharacterExprHelper::getLength(mlir::Value memref) {
720   auto memrefType = memref.getType();
721   auto charType = recoverCharacterType(memrefType);
722   assert(charType && "must be a character type");
723   if (charType.hasConstantLen())
724     return builder.createIntegerConstant(loc, builder.getCharacterLengthType(),
725                                          charType.getLen());
726   if (memrefType.isa<fir::BoxType>())
727     return readLengthFromBox(memref);
728   if (memrefType.isa<fir::BoxCharType>())
729     return createUnboxChar(memref).second;
730 
731   // Length cannot be deduced from memref.
732   return {};
733 }
734 
735 std::pair<mlir::Value, mlir::Value>
736 fir::factory::extractCharacterProcedureTuple(fir::FirOpBuilder &builder,
737                                              mlir::Location loc,
738                                              mlir::Value tuple) {
739   mlir::TupleType tupleType = tuple.getType().cast<mlir::TupleType>();
740   mlir::Value addr = builder.create<fir::ExtractValueOp>(
741       loc, tupleType.getType(0), tuple,
742       builder.getArrayAttr(
743           {builder.getIntegerAttr(builder.getIndexType(), 0)}));
744   mlir::Value len = builder.create<fir::ExtractValueOp>(
745       loc, tupleType.getType(1), tuple,
746       builder.getArrayAttr(
747           {builder.getIntegerAttr(builder.getIndexType(), 1)}));
748   return {addr, len};
749 }
750 
751 mlir::Value fir::factory::createCharacterProcedureTuple(
752     fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type argTy,
753     mlir::Value addr, mlir::Value len) {
754   mlir::TupleType tupleType = argTy.cast<mlir::TupleType>();
755   addr = builder.createConvert(loc, tupleType.getType(0), addr);
756   len = builder.createConvert(loc, tupleType.getType(1), len);
757   mlir::Value tuple = builder.create<fir::UndefOp>(loc, tupleType);
758   tuple = builder.create<fir::InsertValueOp>(
759       loc, tupleType, tuple, addr,
760       builder.getArrayAttr(
761           {builder.getIntegerAttr(builder.getIndexType(), 0)}));
762   tuple = builder.create<fir::InsertValueOp>(
763       loc, tupleType, tuple, len,
764       builder.getArrayAttr(
765           {builder.getIntegerAttr(builder.getIndexType(), 1)}));
766   return tuple;
767 }
768 
769 bool fir::factory::isCharacterProcedureTuple(mlir::Type ty) {
770   mlir::TupleType tuple = ty.dyn_cast<mlir::TupleType>();
771   return tuple && tuple.size() == 2 &&
772          tuple.getType(0).isa<mlir::FunctionType>() &&
773          fir::isa_integer(tuple.getType(1));
774 }
775 
776 mlir::Type
777 fir::factory::getCharacterProcedureTupleType(mlir::Type funcPointerType) {
778   mlir::MLIRContext *context = funcPointerType.getContext();
779   mlir::Type lenType = mlir::IntegerType::get(context, 64);
780   return mlir::TupleType::get(context, {funcPointerType, lenType});
781 }
782