1 //===-- Character.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "flang/Optimizer/Builder/Character.h"
14 #include "flang/Lower/Todo.h"
15 #include "flang/Optimizer/Builder/DoLoopHelper.h"
16 #include "llvm/Support/Debug.h"
17 #include <optional>
18 
19 #define DEBUG_TYPE "flang-lower-character"
20 
21 //===----------------------------------------------------------------------===//
22 // CharacterExprHelper implementation
23 //===----------------------------------------------------------------------===//
24 
25 /// Unwrap base fir.char<kind,len> type.
26 static fir::CharacterType recoverCharacterType(mlir::Type type) {
27   if (auto boxType = type.dyn_cast<fir::BoxCharType>())
28     return boxType.getEleTy();
29   while (true) {
30     type = fir::unwrapRefType(type);
31     if (auto boxTy = type.dyn_cast<fir::BoxType>())
32       type = boxTy.getEleTy();
33     else
34       break;
35   }
36   return fir::unwrapSequenceType(type).cast<fir::CharacterType>();
37 }
38 
39 /// Get fir.char<kind> type with the same kind as inside str.
40 fir::CharacterType
41 fir::factory::CharacterExprHelper::getCharacterType(mlir::Type type) {
42   assert(isCharacterScalar(type) && "expected scalar character");
43   return recoverCharacterType(type);
44 }
45 
46 fir::CharacterType fir::factory::CharacterExprHelper::getCharacterType(
47     const fir::CharBoxValue &box) {
48   return getCharacterType(box.getBuffer().getType());
49 }
50 
51 fir::CharacterType
52 fir::factory::CharacterExprHelper::getCharacterType(mlir::Value str) {
53   return getCharacterType(str.getType());
54 }
55 
56 /// Determine the static size of the character. Returns the computed size, not
57 /// an IR Value.
58 static std::optional<fir::CharacterType::LenType>
59 getCompileTimeLength(const fir::CharBoxValue &box) {
60   auto len = recoverCharacterType(box.getBuffer().getType()).getLen();
61   if (len == fir::CharacterType::unknownLen())
62     return {};
63   return len;
64 }
65 
66 /// Detect the precondition that the value `str` does not reside in memory. Such
67 /// values will have a type `!fir.array<...x!fir.char<N>>` or `!fir.char<N>`.
68 LLVM_ATTRIBUTE_UNUSED static bool needToMaterialize(mlir::Value str) {
69   return str.getType().isa<fir::SequenceType>() || fir::isa_char(str.getType());
70 }
71 
72 /// Unwrap integer constant from mlir::Value.
73 static llvm::Optional<std::int64_t> getIntIfConstant(mlir::Value value) {
74   if (auto *definingOp = value.getDefiningOp())
75     if (auto cst = mlir::dyn_cast<mlir::ConstantOp>(definingOp))
76       if (auto intAttr = cst.getValue().dyn_cast<mlir::IntegerAttr>())
77         return intAttr.getInt();
78   return {};
79 }
80 
81 /// This is called only if `str` does not reside in memory. Such a bare string
82 /// value will be converted into a memory-based temporary and an extended
83 /// boxchar value returned.
84 fir::CharBoxValue
85 fir::factory::CharacterExprHelper::materializeValue(mlir::Value str) {
86   assert(needToMaterialize(str));
87   auto ty = str.getType();
88   assert(isCharacterScalar(ty) && "expected scalar character");
89   auto charTy = ty.dyn_cast<fir::CharacterType>();
90   if (!charTy || charTy.getLen() == fir::CharacterType::unknownLen()) {
91     LLVM_DEBUG(llvm::dbgs() << "cannot materialize: " << str << '\n');
92     llvm_unreachable("must be a !fir.char<N> type");
93   }
94   auto len = builder.createIntegerConstant(
95       loc, builder.getCharacterLengthType(), charTy.getLen());
96   auto temp = builder.create<fir::AllocaOp>(loc, charTy);
97   builder.create<fir::StoreOp>(loc, str, temp);
98   LLVM_DEBUG(llvm::dbgs() << "materialized as local: " << str << " -> (" << temp
99                           << ", " << len << ")\n");
100   return {temp, len};
101 }
102 
103 fir::ExtendedValue
104 fir::factory::CharacterExprHelper::toExtendedValue(mlir::Value character,
105                                                    mlir::Value len) {
106   auto lenType = builder.getCharacterLengthType();
107   auto type = character.getType();
108   auto base = fir::isa_passbyref_type(type) ? character : mlir::Value{};
109   auto resultLen = len;
110   llvm::SmallVector<mlir::Value> extents;
111 
112   if (auto eleType = fir::dyn_cast_ptrEleTy(type))
113     type = eleType;
114 
115   if (auto arrayType = type.dyn_cast<fir::SequenceType>()) {
116     type = arrayType.getEleTy();
117     auto indexType = builder.getIndexType();
118     for (auto extent : arrayType.getShape()) {
119       if (extent == fir::SequenceType::getUnknownExtent())
120         break;
121       extents.emplace_back(
122           builder.createIntegerConstant(loc, indexType, extent));
123     }
124     // Last extent might be missing in case of assumed-size. If more extents
125     // could not be deduced from type, that's an error (a fir.box should
126     // have been used in the interface).
127     if (extents.size() + 1 < arrayType.getShape().size())
128       mlir::emitError(loc, "cannot retrieve array extents from type");
129   }
130 
131   if (auto charTy = type.dyn_cast<fir::CharacterType>()) {
132     if (!resultLen && charTy.getLen() != fir::CharacterType::unknownLen())
133       resultLen = builder.createIntegerConstant(loc, lenType, charTy.getLen());
134   } else if (auto boxCharType = type.dyn_cast<fir::BoxCharType>()) {
135     auto refType = builder.getRefType(boxCharType.getEleTy());
136     // If the embox is accessible, use its operand to avoid filling
137     // the generated fir with embox/unbox.
138     mlir::Value boxCharLen;
139     if (auto *definingOp = character.getDefiningOp()) {
140       if (auto box = dyn_cast<fir::EmboxCharOp>(definingOp)) {
141         base = box.memref();
142         boxCharLen = box.len();
143       }
144     }
145     if (!boxCharLen) {
146       auto unboxed =
147           builder.create<fir::UnboxCharOp>(loc, refType, lenType, character);
148       base = builder.createConvert(loc, refType, unboxed.getResult(0));
149       boxCharLen = unboxed.getResult(1);
150     }
151     if (!resultLen) {
152       resultLen = boxCharLen;
153     }
154   } else if (type.isa<fir::BoxType>()) {
155     mlir::emitError(loc, "descriptor or derived type not yet handled");
156   } else {
157     llvm_unreachable("Cannot translate mlir::Value to character ExtendedValue");
158   }
159 
160   if (!base) {
161     if (auto load =
162             mlir::dyn_cast_or_null<fir::LoadOp>(character.getDefiningOp())) {
163       base = load.getOperand();
164     } else {
165       return materializeValue(fir::getBase(character));
166     }
167   }
168   if (!resultLen)
169     llvm::report_fatal_error("no dynamic length found for character");
170   if (!extents.empty())
171     return fir::CharArrayBoxValue{base, resultLen, extents};
172   return fir::CharBoxValue{base, resultLen};
173 }
174 
175 static mlir::Type getSingletonCharType(mlir::MLIRContext *ctxt, int kind) {
176   return fir::CharacterType::getSingleton(ctxt, kind);
177 }
178 
179 mlir::Value
180 fir::factory::CharacterExprHelper::createEmbox(const fir::CharBoxValue &box) {
181   // Base CharBoxValue of CharArrayBoxValue are ok here (do not require a scalar
182   // type)
183   auto charTy = recoverCharacterType(box.getBuffer().getType());
184   auto boxCharType =
185       fir::BoxCharType::get(builder.getContext(), charTy.getFKind());
186   auto refType = fir::ReferenceType::get(boxCharType.getEleTy());
187   mlir::Value buff = box.getBuffer();
188   // fir.boxchar requires a memory reference. Allocate temp if the character is
189   // not in memory.
190   if (!fir::isa_ref_type(buff.getType())) {
191     auto temp = builder.createTemporary(loc, buff.getType());
192     builder.create<fir::StoreOp>(loc, buff, temp);
193     buff = temp;
194   }
195   buff = builder.createConvert(loc, refType, buff);
196   // Convert in case the provided length is not of the integer type that must
197   // be used in boxchar.
198   auto len = builder.createConvert(loc, builder.getCharacterLengthType(),
199                                    box.getLen());
200   return builder.create<fir::EmboxCharOp>(loc, boxCharType, buff, len);
201 }
202 
203 fir::CharBoxValue fir::factory::CharacterExprHelper::toScalarCharacter(
204     const fir::CharArrayBoxValue &box) {
205   if (box.getBuffer().getType().isa<fir::PointerType>())
206     TODO(loc, "concatenating non contiguous character array into a scalar");
207 
208   // TODO: add a fast path multiplying new length at compile time if the info is
209   // in the array type.
210   auto lenType = builder.getCharacterLengthType();
211   auto len = builder.createConvert(loc, lenType, box.getLen());
212   for (auto extent : box.getExtents())
213     len = builder.create<arith::MulIOp>(
214         loc, len, builder.createConvert(loc, lenType, extent));
215 
216   // TODO: typeLen can be improved in compiled constant cases
217   // TODO: allow bare fir.array<> (no ref) conversion here ?
218   auto typeLen = fir::CharacterType::unknownLen();
219   auto kind = recoverCharacterType(box.getBuffer().getType()).getFKind();
220   auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen);
221   auto type = fir::ReferenceType::get(charTy);
222   auto buffer = builder.createConvert(loc, type, box.getBuffer());
223   return {buffer, len};
224 }
225 
226 mlir::Value fir::factory::CharacterExprHelper::createEmbox(
227     const fir::CharArrayBoxValue &box) {
228   // Use same embox as for scalar. It's losing the actual data size information
229   // (We do not multiply the length by the array size), but that is what Fortran
230   // call interfaces using boxchar expect.
231   return createEmbox(static_cast<const fir::CharBoxValue &>(box));
232 }
233 
234 /// Get the address of the element at position \p index of the scalar character
235 /// \p buffer.
236 /// \p buffer must be of type !fir.ref<fir.char<k, len>>. The length may be
237 /// unknown. \p index must have any integer type, and is zero based. The return
238 /// value is a singleton address (!fir.ref<!fir.char<kind>>)
239 mlir::Value
240 fir::factory::CharacterExprHelper::createElementAddr(mlir::Value buffer,
241                                                      mlir::Value index) {
242   // The only way to address an element of a fir.ref<char<kind, len>> is to cast
243   // it to a fir.array<len x fir.char<kind>> and use fir.coordinate_of.
244   auto bufferType = buffer.getType();
245   assert(fir::isa_ref_type(bufferType));
246   assert(isCharacterScalar(bufferType));
247   auto charTy = recoverCharacterType(bufferType);
248   auto singleTy = getSingletonCharType(builder.getContext(), charTy.getFKind());
249   auto singleRefTy = builder.getRefType(singleTy);
250   auto extent = fir::SequenceType::getUnknownExtent();
251   if (charTy.getLen() != fir::CharacterType::unknownLen())
252     extent = charTy.getLen();
253   auto coorTy = builder.getRefType(fir::SequenceType::get({extent}, singleTy));
254 
255   auto coor = builder.createConvert(loc, coorTy, buffer);
256   auto i = builder.createConvert(loc, builder.getIndexType(), index);
257   return builder.create<fir::CoordinateOp>(loc, singleRefTy, coor, i);
258 }
259 
260 /// Load a character out of `buff` from offset `index`.
261 /// `buff` must be a reference to memory.
262 mlir::Value
263 fir::factory::CharacterExprHelper::createLoadCharAt(mlir::Value buff,
264                                                     mlir::Value index) {
265   LLVM_DEBUG(llvm::dbgs() << "load a char: " << buff << " type: "
266                           << buff.getType() << " at: " << index << '\n');
267   return builder.create<fir::LoadOp>(loc, createElementAddr(buff, index));
268 }
269 
270 /// Store the singleton character `c` to `str` at offset `index`.
271 /// `str` must be a reference to memory.
272 void fir::factory::CharacterExprHelper::createStoreCharAt(mlir::Value str,
273                                                           mlir::Value index,
274                                                           mlir::Value c) {
275   LLVM_DEBUG(llvm::dbgs() << "store the char: " << c << " into: " << str
276                           << " type: " << str.getType() << " at: " << index
277                           << '\n');
278   auto addr = createElementAddr(str, index);
279   builder.create<fir::StoreOp>(loc, c, addr);
280 }
281 
282 // FIXME: this temp is useless... either fir.coordinate_of needs to
283 // work on "loaded" characters (!fir.array<len x fir.char<kind>>) or
284 // character should never be loaded.
285 // If this is a fir.array<>, allocate and store the value so that
286 // fir.cooridnate_of can be use on the value.
287 mlir::Value fir::factory::CharacterExprHelper::getCharBoxBuffer(
288     const fir::CharBoxValue &box) {
289   auto buff = box.getBuffer();
290   if (fir::isa_char(buff.getType())) {
291     auto newBuff = builder.create<fir::AllocaOp>(loc, buff.getType());
292     builder.create<fir::StoreOp>(loc, buff, newBuff);
293     return newBuff;
294   }
295   return buff;
296 }
297 
298 /// Get the LLVM intrinsic for `memcpy`. Use the 64 bit version.
299 mlir::FuncOp fir::factory::getLlvmMemcpy(fir::FirOpBuilder &builder) {
300   auto ptrTy = builder.getRefType(builder.getIntegerType(8));
301   llvm::SmallVector<mlir::Type> args = {ptrTy, ptrTy, builder.getI64Type(),
302                                         builder.getI1Type()};
303   auto memcpyTy =
304       mlir::FunctionType::get(builder.getContext(), args, llvm::None);
305   return builder.addNamedFunction(builder.getUnknownLoc(),
306                                   "llvm.memcpy.p0i8.p0i8.i64", memcpyTy);
307 }
308 
309 /// Get the LLVM intrinsic for `memmove`. Use the 64 bit version.
310 mlir::FuncOp fir::factory::getLlvmMemmove(fir::FirOpBuilder &builder) {
311   auto ptrTy = builder.getRefType(builder.getIntegerType(8));
312   llvm::SmallVector<mlir::Type> args = {ptrTy, ptrTy, builder.getI64Type(),
313                                         builder.getI1Type()};
314   auto memmoveTy =
315       mlir::FunctionType::get(builder.getContext(), args, llvm::None);
316   return builder.addNamedFunction(builder.getUnknownLoc(),
317                                   "llvm.memmove.p0i8.p0i8.i64", memmoveTy);
318 }
319 
320 /// Get the LLVM intrinsic for `memset`. Use the 64 bit version.
321 mlir::FuncOp fir::factory::getLlvmMemset(fir::FirOpBuilder &builder) {
322   auto ptrTy = builder.getRefType(builder.getIntegerType(8));
323   llvm::SmallVector<mlir::Type> args = {ptrTy, ptrTy, builder.getI64Type(),
324                                         builder.getI1Type()};
325   auto memsetTy =
326       mlir::FunctionType::get(builder.getContext(), args, llvm::None);
327   return builder.addNamedFunction(builder.getUnknownLoc(),
328                                   "llvm.memset.p0i8.p0i8.i64", memsetTy);
329 }
330 
331 /// Get the standard `realloc` function.
332 mlir::FuncOp fir::factory::getRealloc(fir::FirOpBuilder &builder) {
333   auto ptrTy = builder.getRefType(builder.getIntegerType(8));
334   llvm::SmallVector<mlir::Type> args = {ptrTy, builder.getI64Type()};
335   auto reallocTy = mlir::FunctionType::get(builder.getContext(), args, {ptrTy});
336   return builder.addNamedFunction(builder.getUnknownLoc(), "realloc",
337                                   reallocTy);
338 }
339 
340 /// Create a loop to copy `count` characters from `src` to `dest`. Note that the
341 /// KIND indicates the number of bits in a code point. (ASCII, UCS-2, or UCS-4.)
342 void fir::factory::CharacterExprHelper::createCopy(
343     const fir::CharBoxValue &dest, const fir::CharBoxValue &src,
344     mlir::Value count) {
345   auto fromBuff = getCharBoxBuffer(src);
346   auto toBuff = getCharBoxBuffer(dest);
347   LLVM_DEBUG(llvm::dbgs() << "create char copy from: "; src.dump();
348              llvm::dbgs() << " to: "; dest.dump();
349              llvm::dbgs() << " count: " << count << '\n');
350   auto kind = getCharacterKind(src.getBuffer().getType());
351   // If the src and dest are the same KIND, then use memmove to move the bits.
352   // We don't have to worry about overlapping ranges with memmove.
353   if (getCharacterKind(dest.getBuffer().getType()) == kind) {
354     auto bytes = builder.getKindMap().getCharacterBitsize(kind) / 8;
355     auto i64Ty = builder.getI64Type();
356     auto kindBytes = builder.createIntegerConstant(loc, i64Ty, bytes);
357     auto castCount = builder.createConvert(loc, i64Ty, count);
358     auto totalBytes = builder.create<arith::MulIOp>(loc, kindBytes, castCount);
359     auto notVolatile = builder.createBool(loc, false);
360     auto memmv = getLlvmMemmove(builder);
361     auto argTys = memmv.getType().getInputs();
362     auto toPtr = builder.createConvert(loc, argTys[0], toBuff);
363     auto fromPtr = builder.createConvert(loc, argTys[1], fromBuff);
364     builder.create<fir::CallOp>(
365         loc, memmv, mlir::ValueRange{toPtr, fromPtr, totalBytes, notVolatile});
366     return;
367   }
368 
369   // Convert a CHARACTER of one KIND into a CHARACTER of another KIND.
370   builder.create<fir::CharConvertOp>(loc, src.getBuffer(), count,
371                                      dest.getBuffer());
372 }
373 
374 void fir::factory::CharacterExprHelper::createPadding(
375     const fir::CharBoxValue &str, mlir::Value lower, mlir::Value upper) {
376   auto blank = createBlankConstant(getCharacterType(str));
377   // Always create the loop, if upper < lower, no iteration will be
378   // executed.
379   auto toBuff = getCharBoxBuffer(str);
380   fir::factory::DoLoopHelper{builder, loc}.createLoop(
381       lower, upper, [&](fir::FirOpBuilder &, mlir::Value index) {
382         createStoreCharAt(toBuff, index, blank);
383       });
384 }
385 
386 fir::CharBoxValue
387 fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
388                                                        mlir::Value len) {
389   auto kind = recoverCharacterType(type).getFKind();
390   auto typeLen = fir::CharacterType::unknownLen();
391   // If len is a constant, reflect the length in the type.
392   if (auto cstLen = getIntIfConstant(len))
393     typeLen = *cstLen;
394   auto *ctxt = builder.getContext();
395   auto charTy = fir::CharacterType::get(ctxt, kind, typeLen);
396   llvm::SmallVector<mlir::Value> lenParams;
397   if (typeLen == fir::CharacterType::unknownLen())
398     lenParams.push_back(len);
399   auto ref = builder.allocateLocal(loc, charTy, "", ".chrtmp",
400                                    /*shape=*/llvm::None, lenParams);
401   return {ref, len};
402 }
403 
404 fir::CharBoxValue fir::factory::CharacterExprHelper::createTempFrom(
405     const fir::ExtendedValue &source) {
406   const auto *charBox = source.getCharBox();
407   if (!charBox)
408     fir::emitFatalError(loc, "source must be a fir::CharBoxValue");
409   auto len = charBox->getLen();
410   auto sourceTy = charBox->getBuffer().getType();
411   auto temp = createCharacterTemp(sourceTy, len);
412   if (fir::isa_ref_type(sourceTy)) {
413     createCopy(temp, *charBox, len);
414   } else {
415     auto ref = builder.createConvert(loc, builder.getRefType(sourceTy),
416                                      temp.getBuffer());
417     builder.create<fir::StoreOp>(loc, charBox->getBuffer(), ref);
418   }
419   return temp;
420 }
421 
422 // Simple length one character assignment without loops.
423 void fir::factory::CharacterExprHelper::createLengthOneAssign(
424     const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
425   auto addr = lhs.getBuffer();
426   mlir::Value val = builder.create<fir::LoadOp>(loc, rhs.getBuffer());
427   auto addrTy = builder.getRefType(val.getType());
428   addr = builder.createConvert(loc, addrTy, addr);
429   builder.create<fir::StoreOp>(loc, val, addr);
430 }
431 
432 /// Returns the minimum of integer mlir::Value \p a and \b.
433 mlir::Value genMin(fir::FirOpBuilder &builder, mlir::Location loc,
434                    mlir::Value a, mlir::Value b) {
435   auto cmp =
436       builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt, a, b);
437   return builder.create<mlir::SelectOp>(loc, cmp, a, b);
438 }
439 
440 void fir::factory::CharacterExprHelper::createAssign(
441     const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
442   auto rhsCstLen = getCompileTimeLength(rhs);
443   auto lhsCstLen = getCompileTimeLength(lhs);
444   bool compileTimeSameLength =
445       lhsCstLen && rhsCstLen && *lhsCstLen == *rhsCstLen;
446 
447   if (compileTimeSameLength && *lhsCstLen == 1) {
448     createLengthOneAssign(lhs, rhs);
449     return;
450   }
451 
452   // Copy the minimum of the lhs and rhs lengths and pad the lhs remainder
453   // if needed.
454   auto copyCount = lhs.getLen();
455   auto idxTy = builder.getIndexType();
456   if (!compileTimeSameLength) {
457     auto lhsLen = builder.createConvert(loc, idxTy, lhs.getLen());
458     auto rhsLen = builder.createConvert(loc, idxTy, rhs.getLen());
459     copyCount = genMin(builder, loc, lhsLen, rhsLen);
460   }
461 
462   // Actual copy
463   createCopy(lhs, rhs, copyCount);
464 
465   // Pad if needed.
466   if (!compileTimeSameLength) {
467     auto one = builder.createIntegerConstant(loc, lhs.getLen().getType(), 1);
468     auto maxPadding = builder.create<arith::SubIOp>(loc, lhs.getLen(), one);
469     createPadding(lhs, copyCount, maxPadding);
470   }
471 }
472 
473 fir::CharBoxValue fir::factory::CharacterExprHelper::createConcatenate(
474     const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
475   auto lhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
476                                       lhs.getLen());
477   auto rhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
478                                       rhs.getLen());
479   mlir::Value len = builder.create<arith::AddIOp>(loc, lhsLen, rhsLen);
480   auto temp = createCharacterTemp(getCharacterType(rhs), len);
481   createCopy(temp, lhs, lhsLen);
482   auto one = builder.createIntegerConstant(loc, len.getType(), 1);
483   auto upperBound = builder.create<arith::SubIOp>(loc, len, one);
484   auto lhsLenIdx = builder.createConvert(loc, builder.getIndexType(), lhsLen);
485   auto fromBuff = getCharBoxBuffer(rhs);
486   auto toBuff = getCharBoxBuffer(temp);
487   fir::factory::DoLoopHelper{builder, loc}.createLoop(
488       lhsLenIdx, upperBound, one,
489       [&](fir::FirOpBuilder &bldr, mlir::Value index) {
490         auto rhsIndex = bldr.create<arith::SubIOp>(loc, index, lhsLenIdx);
491         auto charVal = createLoadCharAt(fromBuff, rhsIndex);
492         createStoreCharAt(toBuff, index, charVal);
493       });
494   return temp;
495 }
496 
497 fir::CharBoxValue fir::factory::CharacterExprHelper::createSubstring(
498     const fir::CharBoxValue &box, llvm::ArrayRef<mlir::Value> bounds) {
499   // Constant need to be materialize in memory to use fir.coordinate_of.
500   auto nbounds = bounds.size();
501   if (nbounds < 1 || nbounds > 2) {
502     mlir::emitError(loc, "Incorrect number of bounds in substring");
503     return {mlir::Value{}, mlir::Value{}};
504   }
505   mlir::SmallVector<mlir::Value> castBounds;
506   // Convert bounds to length type to do safe arithmetic on it.
507   for (auto bound : bounds)
508     castBounds.push_back(
509         builder.createConvert(loc, builder.getCharacterLengthType(), bound));
510   auto lowerBound = castBounds[0];
511   // FIR CoordinateOp is zero based but Fortran substring are one based.
512   auto one = builder.createIntegerConstant(loc, lowerBound.getType(), 1);
513   auto offset = builder.create<arith::SubIOp>(loc, lowerBound, one).getResult();
514   auto addr = createElementAddr(box.getBuffer(), offset);
515   auto kind = getCharacterKind(box.getBuffer().getType());
516   auto charTy = fir::CharacterType::getUnknownLen(builder.getContext(), kind);
517   auto resultType = builder.getRefType(charTy);
518   auto substringRef = builder.createConvert(loc, resultType, addr);
519 
520   // Compute the length.
521   mlir::Value substringLen;
522   if (nbounds < 2) {
523     substringLen =
524         builder.create<arith::SubIOp>(loc, box.getLen(), castBounds[0]);
525   } else {
526     substringLen =
527         builder.create<arith::SubIOp>(loc, castBounds[1], castBounds[0]);
528   }
529   substringLen = builder.create<arith::AddIOp>(loc, substringLen, one);
530 
531   // Set length to zero if bounds were reversed (Fortran 2018 9.4.1)
532   auto zero = builder.createIntegerConstant(loc, substringLen.getType(), 0);
533   auto cdt = builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::slt,
534                                            substringLen, zero);
535   substringLen = builder.create<mlir::SelectOp>(loc, cdt, zero, substringLen);
536 
537   return {substringRef, substringLen};
538 }
539 
540 mlir::Value
541 fir::factory::CharacterExprHelper::createLenTrim(const fir::CharBoxValue &str) {
542   // Note: Runtime for LEN_TRIM should also be available at some
543   // point. For now use an inlined implementation.
544   auto indexType = builder.getIndexType();
545   auto len = builder.createConvert(loc, indexType, str.getLen());
546   auto one = builder.createIntegerConstant(loc, indexType, 1);
547   auto minusOne = builder.createIntegerConstant(loc, indexType, -1);
548   auto zero = builder.createIntegerConstant(loc, indexType, 0);
549   auto trueVal = builder.createIntegerConstant(loc, builder.getI1Type(), 1);
550   auto blank = createBlankConstantCode(getCharacterType(str));
551   mlir::Value lastChar = builder.create<arith::SubIOp>(loc, len, one);
552 
553   auto iterWhile =
554       builder.create<fir::IterWhileOp>(loc, lastChar, zero, minusOne, trueVal,
555                                        /*returnFinalCount=*/false, lastChar);
556   auto insPt = builder.saveInsertionPoint();
557   builder.setInsertionPointToStart(iterWhile.getBody());
558   auto index = iterWhile.getInductionVar();
559   // Look for first non-blank from the right of the character.
560   auto fromBuff = getCharBoxBuffer(str);
561   auto elemAddr = createElementAddr(fromBuff, index);
562   auto codeAddr =
563       builder.createConvert(loc, builder.getRefType(blank.getType()), elemAddr);
564   auto c = builder.create<fir::LoadOp>(loc, codeAddr);
565   auto isBlank =
566       builder.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq, blank, c);
567   llvm::SmallVector<mlir::Value> results = {isBlank, index};
568   builder.create<fir::ResultOp>(loc, results);
569   builder.restoreInsertionPoint(insPt);
570   // Compute length after iteration (zero if all blanks)
571   mlir::Value newLen =
572       builder.create<arith::AddIOp>(loc, iterWhile.getResult(1), one);
573   auto result =
574       builder.create<mlir::SelectOp>(loc, iterWhile.getResult(0), zero, newLen);
575   return builder.createConvert(loc, builder.getCharacterLengthType(), result);
576 }
577 
578 fir::CharBoxValue
579 fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
580                                                        int len) {
581   assert(len >= 0 && "expected positive length");
582   auto kind = recoverCharacterType(type).getFKind();
583   auto charType = fir::CharacterType::get(builder.getContext(), kind, len);
584   auto addr = builder.create<fir::AllocaOp>(loc, charType);
585   auto mlirLen =
586       builder.createIntegerConstant(loc, builder.getCharacterLengthType(), len);
587   return {addr, mlirLen};
588 }
589 
590 // Returns integer with code for blank. The integer has the same
591 // size as the character. Blank has ascii space code for all kinds.
592 mlir::Value fir::factory::CharacterExprHelper::createBlankConstantCode(
593     fir::CharacterType type) {
594   auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
595   auto intType = builder.getIntegerType(bits);
596   return builder.createIntegerConstant(loc, intType, ' ');
597 }
598 
599 mlir::Value fir::factory::CharacterExprHelper::createBlankConstant(
600     fir::CharacterType type) {
601   return createSingletonFromCode(createBlankConstantCode(type),
602                                  type.getFKind());
603 }
604 
605 void fir::factory::CharacterExprHelper::createAssign(
606     const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs) {
607   if (auto *str = rhs.getBoxOf<fir::CharBoxValue>()) {
608     if (auto *to = lhs.getBoxOf<fir::CharBoxValue>()) {
609       createAssign(*to, *str);
610       return;
611     }
612   }
613   TODO(loc, "character array assignment");
614   // Note that it is not sure the array aspect should be handled
615   // by this utility.
616 }
617 
618 mlir::Value
619 fir::factory::CharacterExprHelper::createEmboxChar(mlir::Value addr,
620                                                    mlir::Value len) {
621   return createEmbox(fir::CharBoxValue{addr, len});
622 }
623 
624 std::pair<mlir::Value, mlir::Value>
625 fir::factory::CharacterExprHelper::createUnboxChar(mlir::Value boxChar) {
626   using T = std::pair<mlir::Value, mlir::Value>;
627   return toExtendedValue(boxChar).match(
628       [](const fir::CharBoxValue &b) -> T {
629         return {b.getBuffer(), b.getLen()};
630       },
631       [](const fir::CharArrayBoxValue &b) -> T {
632         return {b.getBuffer(), b.getLen()};
633       },
634       [](const auto &) -> T { llvm::report_fatal_error("not a character"); });
635 }
636 
637 bool fir::factory::CharacterExprHelper::isCharacterLiteral(mlir::Type type) {
638   if (auto seqType = type.dyn_cast<fir::SequenceType>())
639     return (seqType.getShape().size() == 1) &&
640            fir::isa_char(seqType.getEleTy());
641   return false;
642 }
643 
644 bool fir::factory::CharacterExprHelper::isCharacterScalar(mlir::Type type) {
645   if (type.isa<fir::BoxCharType>())
646     return true;
647   type = fir::unwrapRefType(type);
648   if (auto boxTy = type.dyn_cast<fir::BoxType>())
649     type = boxTy.getEleTy();
650   type = fir::unwrapRefType(type);
651   return !type.isa<fir::SequenceType>() && fir::isa_char(type);
652 }
653 
654 fir::KindTy
655 fir::factory::CharacterExprHelper::getCharacterKind(mlir::Type type) {
656   assert(isCharacterScalar(type) && "expected scalar character");
657   return recoverCharacterType(type).getFKind();
658 }
659 
660 fir::KindTy
661 fir::factory::CharacterExprHelper::getCharacterOrSequenceKind(mlir::Type type) {
662   return recoverCharacterType(type).getFKind();
663 }
664 
665 bool fir::factory::CharacterExprHelper::isArray(mlir::Type type) {
666   return !isCharacterScalar(type);
667 }
668 
669 bool fir::factory::CharacterExprHelper::hasConstantLengthInType(
670     const fir::ExtendedValue &exv) {
671   auto charTy = recoverCharacterType(fir::getBase(exv).getType());
672   return charTy.hasConstantLen();
673 }
674 
675 mlir::Value
676 fir::factory::CharacterExprHelper::createSingletonFromCode(mlir::Value code,
677                                                            int kind) {
678   auto charType = fir::CharacterType::get(builder.getContext(), kind, 1);
679   auto bits = builder.getKindMap().getCharacterBitsize(kind);
680   auto intType = builder.getIntegerType(bits);
681   auto cast = builder.createConvert(loc, intType, code);
682   auto undef = builder.create<fir::UndefOp>(loc, charType);
683   auto zero = builder.getIntegerAttr(builder.getIndexType(), 0);
684   return builder.create<fir::InsertValueOp>(loc, charType, undef, cast,
685                                             builder.getArrayAttr(zero));
686 }
687 
688 mlir::Value fir::factory::CharacterExprHelper::extractCodeFromSingleton(
689     mlir::Value singleton) {
690   auto type = getCharacterType(singleton);
691   assert(type.getLen() == 1);
692   auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
693   auto intType = builder.getIntegerType(bits);
694   auto zero = builder.getIntegerAttr(builder.getIndexType(), 0);
695   return builder.create<fir::ExtractValueOp>(loc, intType, singleton,
696                                              builder.getArrayAttr(zero));
697 }
698 
699 mlir::Value
700 fir::factory::CharacterExprHelper::readLengthFromBox(mlir::Value box) {
701   auto lenTy = builder.getCharacterLengthType();
702   auto size = builder.create<fir::BoxEleSizeOp>(loc, lenTy, box);
703   auto charTy = recoverCharacterType(box.getType());
704   auto bits = builder.getKindMap().getCharacterBitsize(charTy.getFKind());
705   auto width = bits / 8;
706   if (width > 1) {
707     auto widthVal = builder.createIntegerConstant(loc, lenTy, width);
708     return builder.create<arith::DivSIOp>(loc, size, widthVal);
709   }
710   return size;
711 }
712 
713 mlir::Value fir::factory::CharacterExprHelper::getLength(mlir::Value memref) {
714   auto memrefType = memref.getType();
715   auto charType = recoverCharacterType(memrefType);
716   assert(charType && "must be a character type");
717   if (charType.hasConstantLen())
718     return builder.createIntegerConstant(loc, builder.getCharacterLengthType(),
719                                          charType.getLen());
720   if (memrefType.isa<fir::BoxType>())
721     return readLengthFromBox(memref);
722   if (memrefType.isa<fir::BoxCharType>())
723     return createUnboxChar(memref).second;
724 
725   // Length cannot be deduced from memref.
726   return {};
727 }
728