1 //===-- Algorithms to compose sized memory operations ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Higher order primitives that build upon the SizedOpT facility. 10 // They constitute the basic blocks for composing memory functions. 11 // This file defines the following operations: 12 // - Skip 13 // - Tail 14 // - HeadTail 15 // - Loop 16 // - Align 17 // 18 // See each class for documentation. 19 //===----------------------------------------------------------------------===// 20 21 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H 22 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H 23 24 #include "src/string/memory_utils/address.h" // Address 25 #include "src/string/memory_utils/utils.h" // offset_to_next_aligned 26 27 #include <stddef.h> // ptrdiff_t 28 29 namespace __llvm_libc { 30 31 // We are not yet allowed to use asserts in low level memory operations as 32 // assert itself could depend on them. 33 // We define this empty macro so we can enable them as soon as possible and keep 34 // track of invariants. 35 #define LIBC_ASSERT(COND) 36 37 // An operation that allows to skip the specified amount of bytes. 38 template <ptrdiff_t Bytes> struct Skip { 39 template <typename NextT> struct Then { 40 template <typename DstAddrT> 41 static inline void set(DstAddrT dst, ubyte value) { 42 static_assert(NextT::IS_FIXED_SIZE); 43 NextT::set(offsetAddr<Bytes>(dst), value); 44 } 45 46 template <typename SrcAddrT1, typename SrcAddrT2> 47 static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) { 48 static_assert(NextT::IS_FIXED_SIZE); 49 return NextT::isDifferent(offsetAddr<Bytes>(src1), 50 offsetAddr<Bytes>(src2)); 51 } 52 53 template <typename SrcAddrT1, typename SrcAddrT2> 54 static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) { 55 static_assert(NextT::IS_FIXED_SIZE); 56 return NextT::threeWayCmp(offsetAddr<Bytes>(src1), 57 offsetAddr<Bytes>(src2)); 58 } 59 60 template <typename SrcAddrT1, typename SrcAddrT2> 61 static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2, 62 size_t runtime_size) { 63 static_assert(NextT::IS_RUNTIME_SIZE); 64 return NextT::threeWayCmp(offsetAddr<Bytes>(src1), 65 offsetAddr<Bytes>(src2), runtime_size - Bytes); 66 } 67 }; 68 }; 69 70 // Compute the address of a tail operation. 71 // Because of the runtime size, we loose the alignment information. 72 template <size_t Size, typename AddrT> 73 static auto tailAddr(AddrT addr, size_t runtime_size) { 74 static_assert(IsAddressType<AddrT>::Value); 75 return offsetAddrAssumeAligned<1>(addr, runtime_size - Size); 76 } 77 78 // Perform the operation on the last 'Size' bytes of the buffer. 79 // 80 // e.g. with 81 // [1234567812345678123] 82 // [__XXXXXXXXXXXXXX___] 83 // [________XXXXXXXX___] 84 // 85 // Precondition: `runtime_size >= Size`. 86 template <typename SizedOpT> struct Tail { 87 static_assert(SizedOpT::IS_FIXED_SIZE); 88 static constexpr bool IS_RUNTIME_SIZE = true; 89 static constexpr size_t SIZE = SizedOpT::SIZE; 90 91 template <typename DstAddrT, typename SrcAddrT> 92 static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { 93 SizedOpT::copy(tailAddr<SIZE>(dst, runtime_size), 94 tailAddr<SIZE>(src, runtime_size)); 95 } 96 97 template <typename DstAddrT, typename SrcAddrT> 98 static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { 99 SizedOpT::move(tailAddr<SIZE>(dst, runtime_size), 100 tailAddr<SIZE>(src, runtime_size)); 101 } 102 103 template <typename DstAddrT> 104 static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { 105 SizedOpT::set(tailAddr<SIZE>(dst, runtime_size), value); 106 } 107 108 template <typename SrcAddrT1, typename SrcAddrT2> 109 static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2, 110 size_t runtime_size) { 111 return SizedOpT::isDifferent(tailAddr<SIZE>(src1, runtime_size), 112 tailAddr<SIZE>(src2, runtime_size)); 113 } 114 115 template <typename SrcAddrT1, typename SrcAddrT2> 116 static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2, 117 size_t runtime_size) { 118 return SizedOpT::threeWayCmp(tailAddr<SIZE>(src1, runtime_size), 119 tailAddr<SIZE>(src2, runtime_size)); 120 } 121 }; 122 123 // Perform the operation on the first and the last `SizedOpT::Size` bytes of the 124 // buffer. This is useful for overlapping operations. 125 // 126 // e.g. with 127 // [1234567812345678123] 128 // [__XXXXXXXXXXXXXX___] 129 // [__XXXXXXXX_________] 130 // [________XXXXXXXX___] 131 // 132 // Precondition: `runtime_size >= Size && runtime_size <= 2 x Size`. 133 template <typename SizedOpT> struct HeadTail { 134 static_assert(SizedOpT::IS_FIXED_SIZE); 135 static constexpr bool IS_RUNTIME_SIZE = true; 136 137 template <typename DstAddrT, typename SrcAddrT> 138 static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { 139 LIBC_ASSERT(runtime_size >= SizedOpT::SIZE); 140 SizedOpT::copy(dst, src); 141 Tail<SizedOpT>::copy(dst, src, runtime_size); 142 } 143 144 template <typename DstAddrT, typename SrcAddrT> 145 static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { 146 LIBC_ASSERT(runtime_size >= SizedOpT::SIZE); 147 static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE; 148 // The load and store operations can be performed in any order as long as 149 // they are not interleaved. More investigations are needed to determine the 150 // best order. 151 auto head = SizedOpT::load(src); 152 auto tail = SizedOpT::load(tailAddr<BLOCK_SIZE>(src, runtime_size)); 153 SizedOpT::store(tailAddr<BLOCK_SIZE>(dst, runtime_size), tail); 154 SizedOpT::store(dst, head); 155 } 156 157 template <typename DstAddrT> 158 static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { 159 LIBC_ASSERT(runtime_size >= SizedOpT::SIZE); 160 SizedOpT::set(dst, value); 161 Tail<SizedOpT>::set(dst, value, runtime_size); 162 } 163 164 template <typename SrcAddrT1, typename SrcAddrT2> 165 static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2, 166 size_t runtime_size) { 167 LIBC_ASSERT(runtime_size >= SizedOpT::SIZE); 168 // Two strategies can be applied here: 169 // 1. Compute head and tail and compose them with a bitwise or operation. 170 // 2. Stop early if head is different. 171 // We chose the later because HeadTail operations are typically performed 172 // with sizes ranging from 4 to 256 bytes. The cost of the loads is then 173 // significantly larger than the cost of the branch. 174 if (const uint64_t res = SizedOpT::isDifferent(src1, src2)) 175 return res; 176 return Tail<SizedOpT>::isDifferent(src1, src2, runtime_size); 177 } 178 179 template <typename SrcAddrT1, typename SrcAddrT2> 180 static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2, 181 size_t runtime_size) { 182 LIBC_ASSERT(runtime_size >= SizedOpT::SIZE && 183 runtime_size <= 2 * SizedOpT::SIZE); 184 if (const int32_t res = SizedOpT::threeWayCmp(src1, src2)) 185 return res; 186 return Tail<SizedOpT>::threeWayCmp(src1, src2, runtime_size); 187 } 188 }; 189 190 // Simple loop ending with a Tail operation. 191 // 192 // e.g. with 193 // [12345678123456781234567812345678] 194 // [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___] 195 // [__XXXXXXXX_______________________] 196 // [__________XXXXXXXX_______________] 197 // [__________________XXXXXXXX_______] 198 // [______________________XXXXXXXX___] 199 // 200 // Precondition: 201 // - runtime_size >= Size 202 template <typename SizedOpT> struct Loop { 203 static_assert(SizedOpT::IS_FIXED_SIZE); 204 static constexpr bool IS_RUNTIME_SIZE = true; 205 static constexpr size_t BLOCK_SIZE = SizedOpT::SIZE; 206 207 template <typename DstAddrT, typename SrcAddrT> 208 static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { 209 size_t offset = 0; 210 do { 211 SizedOpT::copy(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset), 212 offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset)); 213 offset += BLOCK_SIZE; 214 } while (offset < runtime_size - BLOCK_SIZE); 215 Tail<SizedOpT>::copy(dst, src, runtime_size); 216 } 217 218 // Move forward suitable when dst < src. We load the tail bytes before 219 // handling the loop. 220 // 221 // e.g. Moving two bytes 222 // [ | | | | |] 223 // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] 224 // [_________________________LLLLLLLL___] 225 // [___LLLLLLLL_________________________] 226 // [_SSSSSSSS___________________________] 227 // [___________LLLLLLLL_________________] 228 // [_________SSSSSSSS___________________] 229 // [___________________LLLLLLLL_________] 230 // [_________________SSSSSSSS___________] 231 // [_______________________SSSSSSSS_____] 232 template <typename DstAddrT, typename SrcAddrT> 233 static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { 234 const auto tail_value = 235 SizedOpT::load(tailAddr<BLOCK_SIZE>(src, runtime_size)); 236 size_t offset = 0; 237 do { 238 SizedOpT::move(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset), 239 offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset)); 240 offset += BLOCK_SIZE; 241 } while (offset < runtime_size - BLOCK_SIZE); 242 SizedOpT::store(tailAddr<BLOCK_SIZE>(dst, runtime_size), tail_value); 243 } 244 245 // Move backward suitable when dst > src. We load the head bytes before 246 // handling the loop. 247 // 248 // e.g. Moving two bytes 249 // [ | | | | |] 250 // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___] 251 // [___LLLLLLLL_________________________] 252 // [_________________________LLLLLLLL___] 253 // [___________________________SSSSSSSS_] 254 // [_________________LLLLLLLL___________] 255 // [___________________SSSSSSSS_________] 256 // [_________LLLLLLLL___________________] 257 // [___________SSSSSSSS_________________] 258 // [_____SSSSSSSS_______________________] 259 template <typename DstAddrT, typename SrcAddrT> 260 static inline void move_backward(DstAddrT dst, SrcAddrT src, 261 size_t runtime_size) { 262 const auto head_value = SizedOpT::load(src); 263 ptrdiff_t offset = runtime_size - BLOCK_SIZE; 264 do { 265 SizedOpT::move(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset), 266 offsetAddrMultiplesOf<BLOCK_SIZE>(src, offset)); 267 offset -= BLOCK_SIZE; 268 } while (offset >= 0); 269 SizedOpT::store(dst, head_value); 270 } 271 272 template <typename DstAddrT> 273 static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { 274 size_t offset = 0; 275 do { 276 SizedOpT::set(offsetAddrMultiplesOf<BLOCK_SIZE>(dst, offset), value); 277 offset += BLOCK_SIZE; 278 } while (offset < runtime_size - BLOCK_SIZE); 279 Tail<SizedOpT>::set(dst, value, runtime_size); 280 } 281 282 template <typename SrcAddrT1, typename SrcAddrT2> 283 static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2, 284 size_t runtime_size) { 285 size_t offset = 0; 286 do { 287 if (uint64_t res = SizedOpT::isDifferent( 288 offsetAddrMultiplesOf<BLOCK_SIZE>(src1, offset), 289 offsetAddrMultiplesOf<BLOCK_SIZE>(src2, offset))) 290 return res; 291 offset += BLOCK_SIZE; 292 } while (offset < runtime_size - BLOCK_SIZE); 293 return Tail<SizedOpT>::isDifferent(src1, src2, runtime_size); 294 } 295 296 template <typename SrcAddrT1, typename SrcAddrT2> 297 static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2, 298 size_t runtime_size) { 299 size_t offset = 0; 300 do { 301 if (int32_t res = SizedOpT::threeWayCmp( 302 offsetAddrMultiplesOf<BLOCK_SIZE>(src1, offset), 303 offsetAddrMultiplesOf<BLOCK_SIZE>(src2, offset))) 304 return res; 305 offset += BLOCK_SIZE; 306 } while (offset < runtime_size - BLOCK_SIZE); 307 return Tail<SizedOpT>::threeWayCmp(src1, src2, runtime_size); 308 } 309 }; 310 311 // Aligns using a statically-sized operation, then calls the subsequent NextT 312 // operation. 313 // 314 // e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as: 315 // Align<_16, Arg::Dst>::Then<Loop<_32>>::copy(dst, src, runtime_size); 316 enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 }; 317 template <typename SizedOpT, Arg AlignOn = Arg::_1> struct Align { 318 static_assert(SizedOpT::IS_FIXED_SIZE); 319 320 template <typename NextT> struct Then { 321 static_assert(NextT::IS_RUNTIME_SIZE); 322 323 template <typename DstAddrT, typename SrcAddrT> 324 static inline void copy(DstAddrT dst, SrcAddrT src, size_t runtime_size) { 325 SizedOpT::copy(dst, src); 326 auto aligned = align(dst, src, runtime_size); 327 NextT::copy(aligned.arg1, aligned.arg2, aligned.size); 328 } 329 330 // Move forward suitable when dst < src. The alignment is performed with 331 // an HeadTail operation of size ∈ [Alignment, 2 x Alignment]. 332 // 333 // e.g. Moving two bytes and making sure src is then aligned. 334 // [ | | | | ] 335 // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] 336 // [____LLLLLLLL_____________________] 337 // [___________LLLLLLLL______________] 338 // [_SSSSSSSS________________________] 339 // [________SSSSSSSS_________________] 340 // 341 // e.g. Moving two bytes and making sure dst is then aligned. 342 // [ | | | | ] 343 // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_] 344 // [____LLLLLLLL_____________________] 345 // [______LLLLLLLL___________________] 346 // [_SSSSSSSS________________________] 347 // [___SSSSSSSS______________________] 348 template <typename DstAddrT, typename SrcAddrT> 349 static inline void move(DstAddrT dst, SrcAddrT src, size_t runtime_size) { 350 auto aligned_after_begin = align(dst, src, runtime_size); 351 // We move pointers forward by Size so we can perform HeadTail. 352 auto aligned = aligned_after_begin.stepForward(); 353 HeadTail<SizedOpT>::move(dst, src, runtime_size - aligned.size); 354 NextT::move(aligned.arg1, aligned.arg2, aligned.size); 355 } 356 357 // Move backward suitable when dst > src. The alignment is performed with 358 // an HeadTail operation of size ∈ [Alignment, 2 x Alignment]. 359 // 360 // e.g. Moving two bytes backward and making sure src is then aligned. 361 // [ | | | | ] 362 // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] 363 // [ _________________LLLLLLLL_______] 364 // [ ___________________LLLLLLLL_____] 365 // [____________________SSSSSSSS_____] 366 // [______________________SSSSSSSS___] 367 // 368 // e.g. Moving two bytes and making sure dst is then aligned. 369 // [ | | | | ] 370 // [____XXXXXXXXXXXXXXXXXXXXXXXX_____] 371 // [ _______________LLLLLLLL_________] 372 // [ ___________________LLLLLLLL_____] 373 // [__________________SSSSSSSS_______] 374 // [______________________SSSSSSSS___] 375 template <typename DstAddrT, typename SrcAddrT> 376 static inline void move_backward(DstAddrT dst, SrcAddrT src, 377 size_t runtime_size) { 378 const auto dst_end = offsetAddrAssumeAligned<1>(dst, runtime_size); 379 const auto src_end = offsetAddrAssumeAligned<1>(src, runtime_size); 380 auto aligned_after_end = align(dst_end, src_end, 0); 381 // We move pointers back by 2 x Size so we can perform HeadTail. 382 auto aligned = aligned_after_end.stepBack().stepBack(); 383 HeadTail<SizedOpT>::move(aligned.arg1, aligned.arg2, aligned.size); 384 NextT::move_backward(dst, src, runtime_size - aligned.size); 385 } 386 387 template <typename DstAddrT> 388 static inline void set(DstAddrT dst, ubyte value, size_t runtime_size) { 389 SizedOpT::set(dst, value); 390 DstAddrT _(nullptr); 391 auto aligned = align(dst, _, runtime_size); 392 NextT::set(aligned.arg1, value, aligned.size); 393 } 394 395 template <typename SrcAddrT1, typename SrcAddrT2> 396 static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2, 397 size_t runtime_size) { 398 if (const uint64_t res = SizedOpT::isDifferent(src1, src2)) 399 return res; 400 auto aligned = align(src1, src2, runtime_size); 401 return NextT::isDifferent(aligned.arg1, aligned.arg2, aligned.size); 402 } 403 404 template <typename SrcAddrT1, typename SrcAddrT2> 405 static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2, 406 size_t runtime_size) { 407 if (const int32_t res = SizedOpT::threeWayCmp(src1, src2)) 408 return res; 409 auto aligned = align(src1, src2, runtime_size); 410 return NextT::threeWayCmp(aligned.arg1, aligned.arg2, aligned.size); 411 } 412 }; 413 414 private: 415 static constexpr size_t ALIGN_OP_SIZE = SizedOpT::SIZE; 416 static_assert(ALIGN_OP_SIZE > 1); 417 418 template <typename Arg1AddrT, typename Arg2AddrT> struct Aligned { 419 Arg1AddrT arg1; 420 Arg2AddrT arg2; 421 size_t size; 422 423 Aligned stepForward() const { 424 return Aligned{offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg1, ALIGN_OP_SIZE), 425 offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg2, ALIGN_OP_SIZE), 426 size - ALIGN_OP_SIZE}; 427 } 428 429 Aligned stepBack() const { 430 return Aligned{offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg1, -ALIGN_OP_SIZE), 431 offsetAddrMultiplesOf<ALIGN_OP_SIZE>(arg2, -ALIGN_OP_SIZE), 432 size + ALIGN_OP_SIZE}; 433 } 434 }; 435 436 template <typename Arg1AddrT, typename Arg2AddrT> 437 static auto makeAligned(Arg1AddrT arg1, Arg2AddrT arg2, size_t size) { 438 return Aligned<Arg1AddrT, Arg2AddrT>{arg1, arg2, size}; 439 } 440 441 template <typename Arg1AddrT, typename Arg2AddrT> 442 static auto align(Arg1AddrT arg1, Arg2AddrT arg2, size_t runtime_size) { 443 static_assert(IsAddressType<Arg1AddrT>::Value); 444 static_assert(IsAddressType<Arg2AddrT>::Value); 445 if constexpr (AlignOn == Arg::_1) { 446 auto offset = offset_to_next_aligned<ALIGN_OP_SIZE>(arg1.ptr_); 447 return makeAligned(offsetAddrAssumeAligned<ALIGN_OP_SIZE>(arg1, offset), 448 offsetAddrAssumeAligned<1>(arg2, offset), 449 runtime_size - offset); 450 } else if constexpr (AlignOn == Arg::_2) { 451 auto offset = offset_to_next_aligned<ALIGN_OP_SIZE>(arg2.ptr_); 452 return makeAligned(offsetAddrAssumeAligned<1>(arg1, offset), 453 offsetAddrAssumeAligned<ALIGN_OP_SIZE>(arg2, offset), 454 runtime_size - offset); 455 } else { 456 DeferredStaticAssert("AlignOn must be either Arg::_1 or Arg::_2"); 457 } 458 } 459 }; 460 461 } // namespace __llvm_libc 462 463 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ALGORITHM_H 464